All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Arm64: convert soft_restart() to assembly code
@ 2014-08-12 12:42 Arun Chandran
  2014-08-12 14:05 ` Mark Rutland
                   ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-12 12:42 UTC (permalink / raw)
  To: linux-arm-kernel

soft_restart() will fail on arm64 systems that does not
quarantee the flushing of cache to PoC with flush_cache_all().

soft_restart(addr)
{
	push_to_stack(addr);

	Do mm setup for restart;
	Flush&turnoff D-cache;

	pop_from_stack(addr); --> fails here as addr is not at PoC
	cpu_reset(addr) --> Jumps to invalid address
}

So convert the whole code to assembly to make sure that addr
is not pushed to stack.

Signed-off-by: Arun Chandran <achandran@mvista.com>
---
 arch/arm64/include/asm/proc-fns.h    |    1 +
 arch/arm64/include/asm/system_misc.h |    1 -
 arch/arm64/kernel/process.c          |   38 ++--------------------------------
 arch/arm64/mm/proc.S                 |   34 ++++++++++++++++++++++++++++++
 4 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 0c657bb..e18d5d0 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 7a18fab..659fbf5 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -41,7 +41,6 @@ struct mm_struct;
 extern void show_pte(struct mm_struct *mm, unsigned long addr);
 extern void __show_regs(struct pt_regs *);
 
-void soft_restart(unsigned long);
 extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
 #define UDBG_UNDEFINED	(1 << 0)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1309d64..3ca1ade 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -57,40 +57,6 @@ unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif
 
-static void setup_restart(void)
-{
-	/*
-	 * Tell the mm system that we are going to reboot -
-	 * we may need it to insert some 1:1 mappings so that
-	 * soft boot works.
-	 */
-	setup_mm_for_reboot();
-
-	/* Clean and invalidate caches */
-	flush_cache_all();
-
-	/* Turn D-cache off */
-	cpu_cache_off();
-
-	/* Push out any further dirty data, and ensure cache is empty */
-	flush_cache_all();
-}
-
-void soft_restart(unsigned long addr)
-{
-	typedef void (*phys_reset_t)(unsigned long);
-	phys_reset_t phys_reset;
-
-	setup_restart();
-
-	/* Switch to the identity mapping */
-	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
-	phys_reset(addr);
-
-	/* Should never get here */
-	BUG();
-}
-
 /*
  * Function pointers to optional machine specific functions
  */
@@ -162,8 +128,8 @@ void machine_power_off(void)
 
 /*
  * Restart requires that the secondary CPUs stop performing any activity
- * while the primary CPU resets the system. Systems with a single CPU can
- * use soft_restart() as their machine descriptor's .restart hook, since that
+ * while the primary CPU resets the system. Systems with a single CPU can use
+ * cpu_soft_restart() as their machine descriptor's .restart hook, since that
  * will cause the only available CPU to reset. Systems with multiple CPUs must
  * provide a HW restart implementation, to ensure that all CPUs reset@once.
  * This is required so that any code running after reset on the primary CPU
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7736779..a7c3fce 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -76,6 +76,40 @@ ENTRY(cpu_reset)
 	ret	x0
 ENDPROC(cpu_reset)
 
+	.align 3
+1:	.quad	memstart_addr
+
+ENTRY(cpu_soft_restart)
+	adr	x1, cpu_reset
+	adr	x2, 1b
+
+	/* virt_to_phys(cpu_reset) */
+	ldr	x3, [x2]
+	ldr	x3, [x3]
+	mov	x4, #1
+	lsl	x4, x4, #(VA_BITS - 1)
+	add	x1, x1, x4
+	add	x1, x1, x3
+
+	/* Save it; We can't use stack as it is going to run with caches OFF */
+	mov	x19, x0
+	mov	x20, x1
+
+	bl	setup_mm_for_reboot
+
+	bl	flush_cache_all
+	/* Turn D-cache off */
+	bl	cpu_cache_off
+	/* Push out any further dirty data, and ensure cache is empty */
+	bl	flush_cache_all
+
+	mov	x0, x19
+
+	br	x20
+	/* It should never come back */
+	bl	panic
+ENDPROC(cpu_soft_restart)
+
 /*
  *	cpu_do_idle()
  *
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-12 12:42 [PATCH] Arm64: convert soft_restart() to assembly code Arun Chandran
@ 2014-08-12 14:05 ` Mark Rutland
  2014-08-13  4:57   ` Arun Chandran
  2014-08-13  7:43 ` [PATCH] Arm64: convert part of soft_restart() to assembly Arun Chandran
  2014-08-15 17:20 ` [PATCH] Arm64: convert soft_restart() to assembly code Geoff Levand
  2 siblings, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-12 14:05 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Arun,

On Tue, Aug 12, 2014 at 01:42:45PM +0100, Arun Chandran wrote:
> soft_restart() will fail on arm64 systems that does not
> quarantee the flushing of cache to PoC with flush_cache_all().

The flush_cache_all function is never guaranteed to flush data to the
PoC, so I wouldn't say that this is only broken on some systems. We just
happen to have been lucky so far.

> soft_restart(addr)
> {
> 	push_to_stack(addr);
> 
> 	Do mm setup for restart;
> 	Flush&turnoff D-cache;
> 
> 	pop_from_stack(addr); --> fails here as addr is not at PoC
> 	cpu_reset(addr) --> Jumps to invalid address
> }

This doesn't match this organisation on soft_restart, which makes it a
little confusing.

>
> So convert the whole code to assembly to make sure that addr
> is not pushed to stack.

It's possible that the compiler would previously have spilled other
values too, we only observed addr being spilled.

How about we reword this something like:

The current soft_restart and setup_restart implementations incorrectly
assume that the compiler will not spill/fill values to/from the stack,
but the compiler has been observed to do so around the cache being
disabled, resulting in stale values being restored.

As we cannot control the compiler's spilling behaviour we must rewrite
the functions in assembly to avoid use of the stack.

> 
> Signed-off-by: Arun Chandran <achandran@mvista.com>
> ---
>  arch/arm64/include/asm/proc-fns.h    |    1 +
>  arch/arm64/include/asm/system_misc.h |    1 -
>  arch/arm64/kernel/process.c          |   38 ++--------------------------------
>  arch/arm64/mm/proc.S                 |   34 ++++++++++++++++++++++++++++++
>  4 files changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
> index 0c657bb..e18d5d0 100644
> --- a/arch/arm64/include/asm/proc-fns.h
> +++ b/arch/arm64/include/asm/proc-fns.h
> @@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
>  extern void cpu_do_idle(void);
>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
> +extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));
>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
>  
> diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
> index 7a18fab..659fbf5 100644
> --- a/arch/arm64/include/asm/system_misc.h
> +++ b/arch/arm64/include/asm/system_misc.h
> @@ -41,7 +41,6 @@ struct mm_struct;
>  extern void show_pte(struct mm_struct *mm, unsigned long addr);
>  extern void __show_regs(struct pt_regs *);
>  
> -void soft_restart(unsigned long);
>  extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
>  
>  #define UDBG_UNDEFINED	(1 << 0)
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 1309d64..3ca1ade 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -57,40 +57,6 @@ unsigned long __stack_chk_guard __read_mostly;
>  EXPORT_SYMBOL(__stack_chk_guard);
>  #endif
>  
> -static void setup_restart(void)
> -{
> -	/*
> -	 * Tell the mm system that we are going to reboot -
> -	 * we may need it to insert some 1:1 mappings so that
> -	 * soft boot works.
> -	 */
> -	setup_mm_for_reboot();
> -
> -	/* Clean and invalidate caches */
> -	flush_cache_all();
> -
> -	/* Turn D-cache off */
> -	cpu_cache_off();
> -
> -	/* Push out any further dirty data, and ensure cache is empty */
> -	flush_cache_all();
> -}
> -
> -void soft_restart(unsigned long addr)
> -{
> -	typedef void (*phys_reset_t)(unsigned long);
> -	phys_reset_t phys_reset;
> -
> -	setup_restart();
> -
> -	/* Switch to the identity mapping */
> -	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
> -	phys_reset(addr);
> -
> -	/* Should never get here */
> -	BUG();
> -}
> -
>  /*
>   * Function pointers to optional machine specific functions
>   */
> @@ -162,8 +128,8 @@ void machine_power_off(void)
>  
>  /*
>   * Restart requires that the secondary CPUs stop performing any activity
> - * while the primary CPU resets the system. Systems with a single CPU can
> - * use soft_restart() as their machine descriptor's .restart hook, since that
> + * while the primary CPU resets the system. Systems with a single CPU can use
> + * cpu_soft_restart() as their machine descriptor's .restart hook, since that
>   * will cause the only available CPU to reset. Systems with multiple CPUs must
>   * provide a HW restart implementation, to ensure that all CPUs reset at once.
>   * This is required so that any code running after reset on the primary CPU
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 7736779..a7c3fce 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -76,6 +76,40 @@ ENTRY(cpu_reset)
>  	ret	x0
>  ENDPROC(cpu_reset)
>  
> +	.align 3
> +1:	.quad	memstart_addr
> +
> +ENTRY(cpu_soft_restart)
> +	adr	x1, cpu_reset
> +	adr	x2, 1b
> +
> +	/* virt_to_phys(cpu_reset) */
> +	ldr	x3, [x2]
> +	ldr	x3, [x3]
> +	mov	x4, #1
> +	lsl	x4, x4, #(VA_BITS - 1)
> +	add	x1, x1, x4
> +	add	x1, x1, x3

Rather than having a custom virt_to_phys here, I think it would be
better to leave that address translation and table setup in c, and have
the asm be a smaller function which disables and cleans the cache, then
branches to the provided address.

So we'd still have a soft_restart function in c that looked something
like:

void soft_restart(unsigned long addr)
{
	setup_mm_for_reboot();
	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
}

> +
> +	/* Save it; We can't use stack as it is going to run with caches OFF */
> +	mov	x19, x0
> +	mov	x20, x1
> +
> +	bl	setup_mm_for_reboot
> +
> +	bl	flush_cache_all

This first cache flush can be dropped entirely.

> +	/* Turn D-cache off */
> +	bl	cpu_cache_off
> +	/* Push out any further dirty data, and ensure cache is empty */
> +	bl	flush_cache_all
> +
> +	mov	x0, x19
> +
> +	br	x20
> +	/* It should never come back */
> +	bl	panic

Given we didn't use blr x20, won't we get stuck in an infinite loop here
if we returned from the address in x20?

I don't think panic will work here given we disabled the MMU and caches.
A branch to self is probably the best we can do if it's worth doing
anything.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-12 14:05 ` Mark Rutland
@ 2014-08-13  4:57   ` Arun Chandran
  0 siblings, 0 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-13  4:57 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

Thank you for reviewing the code.

On Tue, Aug 12, 2014 at 7:35 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Arun,
>
> On Tue, Aug 12, 2014 at 01:42:45PM +0100, Arun Chandran wrote:
>> soft_restart() will fail on arm64 systems that does not
>> quarantee the flushing of cache to PoC with flush_cache_all().
>
> The flush_cache_all function is never guaranteed to flush data to the
> PoC, so I wouldn't say that this is only broken on some systems. We just
> happen to have been lucky so far.
>
>> soft_restart(addr)
>> {
>>       push_to_stack(addr);
>>
>>       Do mm setup for restart;
>>       Flush&turnoff D-cache;
>>
>>       pop_from_stack(addr); --> fails here as addr is not at PoC
>>       cpu_reset(addr) --> Jumps to invalid address
>> }
>
> This doesn't match this organisation on soft_restart, which makes it a
> little confusing.
>
>>
>> So convert the whole code to assembly to make sure that addr
>> is not pushed to stack.
>
> It's possible that the compiler would previously have spilled other
> values too, we only observed addr being spilled.
>
> How about we reword this something like:
>
> The current soft_restart and setup_restart implementations incorrectly
> assume that the compiler will not spill/fill values to/from the stack,
> but the compiler has been observed to do so around the cache being
> disabled, resulting in stale values being restored.
>
That is better. I will send an updated patch soon.

> As we cannot control the compiler's spilling behaviour we must rewrite
> the functions in assembly to avoid use of the stack.
>
>>
>> Signed-off-by: Arun Chandran <achandran@mvista.com>
>> ---
>>  arch/arm64/include/asm/proc-fns.h    |    1 +
>>  arch/arm64/include/asm/system_misc.h |    1 -
>>  arch/arm64/kernel/process.c          |   38 ++--------------------------------
>>  arch/arm64/mm/proc.S                 |   34 ++++++++++++++++++++++++++++++
>>  4 files changed, 37 insertions(+), 37 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
>> index 0c657bb..e18d5d0 100644
>> --- a/arch/arm64/include/asm/proc-fns.h
>> +++ b/arch/arm64/include/asm/proc-fns.h
>> @@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
>>  extern void cpu_do_idle(void);
>>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>>  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
>> +extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));
>>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
>>
>> diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
>> index 7a18fab..659fbf5 100644
>> --- a/arch/arm64/include/asm/system_misc.h
>> +++ b/arch/arm64/include/asm/system_misc.h
>> @@ -41,7 +41,6 @@ struct mm_struct;
>>  extern void show_pte(struct mm_struct *mm, unsigned long addr);
>>  extern void __show_regs(struct pt_regs *);
>>
>> -void soft_restart(unsigned long);
>>  extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
>>
>>  #define UDBG_UNDEFINED       (1 << 0)
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 1309d64..3ca1ade 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -57,40 +57,6 @@ unsigned long __stack_chk_guard __read_mostly;
>>  EXPORT_SYMBOL(__stack_chk_guard);
>>  #endif
>>
>> -static void setup_restart(void)
>> -{
>> -     /*
>> -      * Tell the mm system that we are going to reboot -
>> -      * we may need it to insert some 1:1 mappings so that
>> -      * soft boot works.
>> -      */
>> -     setup_mm_for_reboot();
>> -
>> -     /* Clean and invalidate caches */
>> -     flush_cache_all();
>> -
>> -     /* Turn D-cache off */
>> -     cpu_cache_off();
>> -
>> -     /* Push out any further dirty data, and ensure cache is empty */
>> -     flush_cache_all();
>> -}
>> -
>> -void soft_restart(unsigned long addr)
>> -{
>> -     typedef void (*phys_reset_t)(unsigned long);
>> -     phys_reset_t phys_reset;
>> -
>> -     setup_restart();
>> -
>> -     /* Switch to the identity mapping */
>> -     phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
>> -     phys_reset(addr);
>> -
>> -     /* Should never get here */
>> -     BUG();
>> -}
>> -
>>  /*
>>   * Function pointers to optional machine specific functions
>>   */
>> @@ -162,8 +128,8 @@ void machine_power_off(void)
>>
>>  /*
>>   * Restart requires that the secondary CPUs stop performing any activity
>> - * while the primary CPU resets the system. Systems with a single CPU can
>> - * use soft_restart() as their machine descriptor's .restart hook, since that
>> + * while the primary CPU resets the system. Systems with a single CPU can use
>> + * cpu_soft_restart() as their machine descriptor's .restart hook, since that
>>   * will cause the only available CPU to reset. Systems with multiple CPUs must
>>   * provide a HW restart implementation, to ensure that all CPUs reset at once.
>>   * This is required so that any code running after reset on the primary CPU
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 7736779..a7c3fce 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -76,6 +76,40 @@ ENTRY(cpu_reset)
>>       ret     x0
>>  ENDPROC(cpu_reset)
>>
>> +     .align 3
>> +1:   .quad   memstart_addr
>> +
>> +ENTRY(cpu_soft_restart)
>> +     adr     x1, cpu_reset
>> +     adr     x2, 1b
>> +
>> +     /* virt_to_phys(cpu_reset) */
>> +     ldr     x3, [x2]
>> +     ldr     x3, [x3]
>> +     mov     x4, #1
>> +     lsl     x4, x4, #(VA_BITS - 1)
>> +     add     x1, x1, x4
>> +     add     x1, x1, x3
>
> Rather than having a custom virt_to_phys here, I think it would be
> better to leave that address translation and table setup in c, and have
> the asm be a smaller function which disables and cleans the cache, then
> branches to the provided address.
>
> So we'd still have a soft_restart function in c that looked something
> like:
>
> void soft_restart(unsigned long addr)
> {
>         setup_mm_for_reboot();
>         cpu_soft_restart(virt_to_phys(cpu_reset), addr);
> }
>

Ok. I also thought of it but my mind was stuck at converting soft_restart() to
assembly.

>> +
>> +     /* Save it; We can't use stack as it is going to run with caches OFF */
>> +     mov     x19, x0
>> +     mov     x20, x1
>> +
>> +     bl      setup_mm_for_reboot
>> +
>> +     bl      flush_cache_all
>
> This first cache flush can be dropped entirely.
>

Ok.
>> +     /* Turn D-cache off */
>> +     bl      cpu_cache_off
>> +     /* Push out any further dirty data, and ensure cache is empty */
>> +     bl      flush_cache_all
>> +
>> +     mov     x0, x19
>> +
>> +     br      x20
>> +     /* It should never come back */
>> +     bl      panic
>
> Given we didn't use blr x20, won't we get stuck in an infinite loop here
> if we returned from the address in x20?
>
> I don't think panic will work here given we disabled the MMU and caches.
> A branch to self is probably the best we can do if it's worth doing
> anything.
>

Ok.

--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert part of soft_restart() to assembly
  2014-08-12 12:42 [PATCH] Arm64: convert soft_restart() to assembly code Arun Chandran
  2014-08-12 14:05 ` Mark Rutland
@ 2014-08-13  7:43 ` Arun Chandran
  2014-08-13 10:58   ` Mark Rutland
  2014-08-15 17:20 ` [PATCH] Arm64: convert soft_restart() to assembly code Geoff Levand
  2 siblings, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-13  7:43 UTC (permalink / raw)
  To: linux-arm-kernel

The current soft_restart() and setup_restart implementations incorrectly
assume that compiler will not spill/fill values to/from stack. However
this assumption seems to be wrong, revealed by the disassembly of the
currently existing code.

Pseudo code for disassembly looks like

soft_restart(addr)
{
	__push_to_stack(addr)

	branch to setup_mm_for_reboot()
	branch to flush_cache_all() --> This is unnecessary
	branch to cpu_cache_off()
	branch to flush_cache_all() --> Not guaranteed of flushing to PoC

	__pop_from_stack(addr) --> Fails here as addr is not at PoC

	cpu_reset(addr) --> cpu_reset receives invalid reset address
}

The compiler is clearly spilling here around the cache being disabled,
resulting in stale values being restored.  As we cannot control the compiler's
spilling behaviour we must rewrite the functions in assembly to
avoid use of the stack.

Signed-off-by: Arun Chandran <achandran@mvista.com>
---
 arch/arm64/include/asm/proc-fns.h |    2 ++
 arch/arm64/kernel/process.c       |   30 ++----------------------------
 arch/arm64/mm/proc.S              |   14 ++++++++++++++
 3 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 0c657bb..86be4f9 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -32,6 +32,8 @@ extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_soft_restart(phys_addr_t cpu_reset,
+		unsigned long addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1309d64..bf66922 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -57,36 +57,10 @@ unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif
 
-static void setup_restart(void)
-{
-	/*
-	 * Tell the mm system that we are going to reboot -
-	 * we may need it to insert some 1:1 mappings so that
-	 * soft boot works.
-	 */
-	setup_mm_for_reboot();
-
-	/* Clean and invalidate caches */
-	flush_cache_all();
-
-	/* Turn D-cache off */
-	cpu_cache_off();
-
-	/* Push out any further dirty data, and ensure cache is empty */
-	flush_cache_all();
-}
-
 void soft_restart(unsigned long addr)
 {
-	typedef void (*phys_reset_t)(unsigned long);
-	phys_reset_t phys_reset;
-
-	setup_restart();
-
-	/* Switch to the identity mapping */
-	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
-	phys_reset(addr);
-
+	setup_mm_for_reboot();
+	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
 	/* Should never get here */
 	BUG();
 }
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7736779..0eff5ee 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -76,6 +76,20 @@ ENTRY(cpu_reset)
 	ret	x0
 ENDPROC(cpu_reset)
 
+ENTRY(cpu_soft_restart)
+	/* Save address of cpu_reset() and reset address */
+	mov	x19, x0
+	mov	x20, x1
+
+	/* Turn D-cache off */
+	bl	cpu_cache_off
+	/* Push out all dirty data, and ensure cache is empty */
+	bl	flush_cache_all
+
+	mov	x0, x20
+	ret	x19
+ENDPROC(cpu_soft_restart)
+
 /*
  *	cpu_do_idle()
  *
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert part of soft_restart() to assembly
  2014-08-13  7:43 ` [PATCH] Arm64: convert part of soft_restart() to assembly Arun Chandran
@ 2014-08-13 10:58   ` Mark Rutland
  2014-08-13 11:17     ` Arun Chandran
  0 siblings, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-13 10:58 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Arun,

On Wed, Aug 13, 2014 at 08:43:21AM +0100, Arun Chandran wrote:
> The current soft_restart() and setup_restart implementations incorrectly
> assume that compiler will not spill/fill values to/from stack. However
> this assumption seems to be wrong, revealed by the disassembly of the
> currently existing code.
> 
> Pseudo code for disassembly looks like
> 
> soft_restart(addr)
> {
> 	__push_to_stack(addr)
> 
> 	branch to setup_mm_for_reboot()
> 	branch to flush_cache_all() --> This is unnecessary
> 	branch to cpu_cache_off()
> 	branch to flush_cache_all() --> Not guaranteed of flushing to PoC
> 
> 	__pop_from_stack(addr) --> Fails here as addr is not at PoC
> 
> 	cpu_reset(addr) --> cpu_reset receives invalid reset address
> }

As I mentioned before, I think having pseudocode here is confusing.
Either we should have a real disassembly or we should drop it. I get the
following when I build a v3.16 arm64 defconfig with Linaro GCC
4.9-2014.05:

ffffffc000085224 <soft_restart>:
ffffffc000085224:       a9be7bfd        stp     x29, x30, [sp,#-32]!
ffffffc000085228:       910003fd        mov     x29, sp
ffffffc00008522c:       f9000fa0        str     x0, [x29,#24]
ffffffc000085230:       94003b16        bl      ffffffc000093e88 <setup_mm_for_reboot>
ffffffc000085234:       94003927        bl      ffffffc0000936d0 <flush_cache_all>
ffffffc000085238:       94003bf2        bl      ffffffc000094200 <cpu_cache_off>
ffffffc00008523c:       94003925        bl      ffffffc0000936d0 <flush_cache_all>
ffffffc000085240:       b00031c1        adrp    x1, ffffffc0006be000 <reset_devices>
ffffffc000085244:       f9400fa0        ldr     x0, [x29,#24]
ffffffc000085248:       f941c822        ldr     x2, [x1,#912]
ffffffc00008524c:       f0000061        adrp    x1, ffffffc000094000 <set_mm_context+0x10>
ffffffc000085250:       91088021        add     x1, x1, #0x220
ffffffc000085254:       8b010041        add     x1, x2, x1
ffffffc000085258:       d2c00802        mov     x2, #0x4000000000               // #274877906944
ffffffc00008525c:       8b020021        add     x1, x1, x2
ffffffc000085260:       d63f0020        blr     x1
...

The two ldrs correspond to the spilled addr variable and memstart_addr
respectively.

> 
> The compiler is clearly spilling here around the cache being disabled,
> resulting in stale values being restored.  As we cannot control the compiler's

Nit: double spacing here doesn't match the rest of the message.

> spilling behaviour we must rewrite the functions in assembly to
> avoid use of the stack.
> 
> Signed-off-by: Arun Chandran <achandran@mvista.com>
> ---
>  arch/arm64/include/asm/proc-fns.h |    2 ++
>  arch/arm64/kernel/process.c       |   30 ++----------------------------
>  arch/arm64/mm/proc.S              |   14 ++++++++++++++
>  3 files changed, 18 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
> index 0c657bb..86be4f9 100644
> --- a/arch/arm64/include/asm/proc-fns.h
> +++ b/arch/arm64/include/asm/proc-fns.h
> @@ -32,6 +32,8 @@ extern void cpu_cache_off(void);
>  extern void cpu_do_idle(void);
>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
> +extern void cpu_soft_restart(phys_addr_t cpu_reset,
> +		unsigned long addr) __attribute__((noreturn));
>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
>  
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 1309d64..bf66922 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -57,36 +57,10 @@ unsigned long __stack_chk_guard __read_mostly;
>  EXPORT_SYMBOL(__stack_chk_guard);
>  #endif
>  
> -static void setup_restart(void)
> -{
> -	/*
> -	 * Tell the mm system that we are going to reboot -
> -	 * we may need it to insert some 1:1 mappings so that
> -	 * soft boot works.
> -	 */
> -	setup_mm_for_reboot();
> -
> -	/* Clean and invalidate caches */
> -	flush_cache_all();
> -
> -	/* Turn D-cache off */
> -	cpu_cache_off();
> -
> -	/* Push out any further dirty data, and ensure cache is empty */
> -	flush_cache_all();
> -}
> -
>  void soft_restart(unsigned long addr)
>  {
> -	typedef void (*phys_reset_t)(unsigned long);
> -	phys_reset_t phys_reset;
> -
> -	setup_restart();
> -
> -	/* Switch to the identity mapping */
> -	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
> -	phys_reset(addr);
> -
> +	setup_mm_for_reboot();
> +	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
>  	/* Should never get here */
>  	BUG();
>  }
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 7736779..0eff5ee 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -76,6 +76,20 @@ ENTRY(cpu_reset)
>  	ret	x0
>  ENDPROC(cpu_reset)
>  
> +ENTRY(cpu_soft_restart)
> +	/* Save address of cpu_reset() and reset address */
> +	mov	x19, x0
> +	mov	x20, x1
> +
> +	/* Turn D-cache off */
> +	bl	cpu_cache_off
> +	/* Push out all dirty data, and ensure cache is empty */
> +	bl	flush_cache_all
> +
> +	mov	x0, x20
> +	ret	x19
> +ENDPROC(cpu_soft_restart)

The code change looks good to me.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert part of soft_restart() to assembly
  2014-08-13 10:58   ` Mark Rutland
@ 2014-08-13 11:17     ` Arun Chandran
  2014-08-13 11:21       ` Mark Rutland
  0 siblings, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-13 11:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

> On Wed, Aug 13, 2014 at 08:43:21AM +0100, Arun Chandran wrote:
>> The current soft_restart() and setup_restart implementations incorrectly
>> assume that compiler will not spill/fill values to/from stack. However
>> this assumption seems to be wrong, revealed by the disassembly of the
>> currently existing code.
>>
>> Pseudo code for disassembly looks like
>>
>> soft_restart(addr)
>> {
>>       __push_to_stack(addr)
>>
>>       branch to setup_mm_for_reboot()
>>       branch to flush_cache_all() --> This is unnecessary
>>       branch to cpu_cache_off()
>>       branch to flush_cache_all() --> Not guaranteed of flushing to PoC
>>
>>       __pop_from_stack(addr) --> Fails here as addr is not at PoC
>>
>>       cpu_reset(addr) --> cpu_reset receives invalid reset address
>> }
>
> As I mentioned before, I think having pseudocode here is confusing.
> Either we should have a real disassembly or we should drop it. I get the
> following when I build a v3.16 arm64 defconfig with Linaro GCC
> 4.9-2014.05:
>

Hmm. I think It is better to drop it as different compilers give different
output. My compiler's output is according to the commit message, but
your's is not. I will send another one soon.

> ffffffc000085224 <soft_restart>:
> ffffffc000085224:       a9be7bfd        stp     x29, x30, [sp,#-32]!
> ffffffc000085228:       910003fd        mov     x29, sp
> ffffffc00008522c:       f9000fa0        str     x0, [x29,#24]
> ffffffc000085230:       94003b16        bl      ffffffc000093e88 <setup_mm_for_reboot>
> ffffffc000085234:       94003927        bl      ffffffc0000936d0 <flush_cache_all>
> ffffffc000085238:       94003bf2        bl      ffffffc000094200 <cpu_cache_off>
> ffffffc00008523c:       94003925        bl      ffffffc0000936d0 <flush_cache_all>
> ffffffc000085240:       b00031c1        adrp    x1, ffffffc0006be000 <reset_devices>
> ffffffc000085244:       f9400fa0        ldr     x0, [x29,#24]
> ffffffc000085248:       f941c822        ldr     x2, [x1,#912]
> ffffffc00008524c:       f0000061        adrp    x1, ffffffc000094000 <set_mm_context+0x10>
> ffffffc000085250:       91088021        add     x1, x1, #0x220
> ffffffc000085254:       8b010041        add     x1, x2, x1
> ffffffc000085258:       d2c00802        mov     x2, #0x4000000000               // #274877906944
> ffffffc00008525c:       8b020021        add     x1, x1, x2
> ffffffc000085260:       d63f0020        blr     x1
> ...
>
> The two ldrs correspond to the spilled addr variable and memstart_addr
> respectively.
>
>>
>> The compiler is clearly spilling here around the cache being disabled,
>> resulting in stale values being restored.  As we cannot control the compiler's
>
> Nit: double spacing here doesn't match the rest of the message.
>
>> spilling behaviour we must rewrite the functions in assembly to
>> avoid use of the stack.
>>
>> Signed-off-by: Arun Chandran <achandran@mvista.com>
>> ---
>>  arch/arm64/include/asm/proc-fns.h |    2 ++
>>  arch/arm64/kernel/process.c       |   30 ++----------------------------
>>  arch/arm64/mm/proc.S              |   14 ++++++++++++++
>>  3 files changed, 18 insertions(+), 28 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
>> index 0c657bb..86be4f9 100644
>> --- a/arch/arm64/include/asm/proc-fns.h
>> +++ b/arch/arm64/include/asm/proc-fns.h
>> @@ -32,6 +32,8 @@ extern void cpu_cache_off(void);
>>  extern void cpu_do_idle(void);
>>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>>  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
>> +extern void cpu_soft_restart(phys_addr_t cpu_reset,
>> +             unsigned long addr) __attribute__((noreturn));
>>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
>>
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 1309d64..bf66922 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -57,36 +57,10 @@ unsigned long __stack_chk_guard __read_mostly;
>>  EXPORT_SYMBOL(__stack_chk_guard);
>>  #endif
>>
>> -static void setup_restart(void)
>> -{
>> -     /*
>> -      * Tell the mm system that we are going to reboot -
>> -      * we may need it to insert some 1:1 mappings so that
>> -      * soft boot works.
>> -      */
>> -     setup_mm_for_reboot();
>> -
>> -     /* Clean and invalidate caches */
>> -     flush_cache_all();
>> -
>> -     /* Turn D-cache off */
>> -     cpu_cache_off();
>> -
>> -     /* Push out any further dirty data, and ensure cache is empty */
>> -     flush_cache_all();
>> -}
>> -
>>  void soft_restart(unsigned long addr)
>>  {
>> -     typedef void (*phys_reset_t)(unsigned long);
>> -     phys_reset_t phys_reset;
>> -
>> -     setup_restart();
>> -
>> -     /* Switch to the identity mapping */
>> -     phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
>> -     phys_reset(addr);
>> -
>> +     setup_mm_for_reboot();
>> +     cpu_soft_restart(virt_to_phys(cpu_reset), addr);
>>       /* Should never get here */
>>       BUG();
>>  }
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 7736779..0eff5ee 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -76,6 +76,20 @@ ENTRY(cpu_reset)
>>       ret     x0
>>  ENDPROC(cpu_reset)
>>
>> +ENTRY(cpu_soft_restart)
>> +     /* Save address of cpu_reset() and reset address */
>> +     mov     x19, x0
>> +     mov     x20, x1
>> +
>> +     /* Turn D-cache off */
>> +     bl      cpu_cache_off
>> +     /* Push out all dirty data, and ensure cache is empty */
>> +     bl      flush_cache_all
>> +
>> +     mov     x0, x20
>> +     ret     x19
>> +ENDPROC(cpu_soft_restart)
>
> The code change looks good to me.

OK. Thanks

--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert part of soft_restart() to assembly
  2014-08-13 11:17     ` Arun Chandran
@ 2014-08-13 11:21       ` Mark Rutland
  0 siblings, 0 replies; 33+ messages in thread
From: Mark Rutland @ 2014-08-13 11:21 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 13, 2014 at 12:17:54PM +0100, Arun Chandran wrote:
> Hi Mark,
> 
> > On Wed, Aug 13, 2014 at 08:43:21AM +0100, Arun Chandran wrote:
> >> The current soft_restart() and setup_restart implementations incorrectly
> >> assume that compiler will not spill/fill values to/from stack. However
> >> this assumption seems to be wrong, revealed by the disassembly of the
> >> currently existing code.
> >>
> >> Pseudo code for disassembly looks like
> >>
> >> soft_restart(addr)
> >> {
> >>       __push_to_stack(addr)
> >>
> >>       branch to setup_mm_for_reboot()
> >>       branch to flush_cache_all() --> This is unnecessary
> >>       branch to cpu_cache_off()
> >>       branch to flush_cache_all() --> Not guaranteed of flushing to PoC
> >>
> >>       __pop_from_stack(addr) --> Fails here as addr is not at PoC
> >>
> >>       cpu_reset(addr) --> cpu_reset receives invalid reset address
> >> }
> >
> > As I mentioned before, I think having pseudocode here is confusing.
> > Either we should have a real disassembly or we should drop it. I get the
> > following when I build a v3.16 arm64 defconfig with Linaro GCC
> > 4.9-2014.05:
> >
> 
> Hmm. I think It is better to drop it as different compilers give different
> output. My compiler's output is according to the commit message, but
> your's is not. I will send another one soon.

Well, either output would be fine as an example. I'd just like to see
the real asm rather than pseudocode.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-12 12:42 [PATCH] Arm64: convert soft_restart() to assembly code Arun Chandran
  2014-08-12 14:05 ` Mark Rutland
  2014-08-13  7:43 ` [PATCH] Arm64: convert part of soft_restart() to assembly Arun Chandran
@ 2014-08-15 17:20 ` Geoff Levand
  2014-08-15 18:21   ` Mark Rutland
  2 siblings, 1 reply; 33+ messages in thread
From: Geoff Levand @ 2014-08-15 17:20 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Arun,

On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
> soft_restart() will fail on arm64 systems that does not
> quarantee the flushing of cache to PoC with flush_cache_all().
> 
> soft_restart(addr)
> {
> 	push_to_stack(addr);
> 
> 	Do mm setup for restart;
> 	Flush&turnoff D-cache;
> 
> 	pop_from_stack(addr); --> fails here as addr is not at PoC
> 	cpu_reset(addr) --> Jumps to invalid address
> }

For the cpu-ops shutdown I'm working on I need a call to move the
secondary processors to an identity mapped spin loop after the identity
map is enabled.  I want to do this in C code, so it needs to happen
after the identity map is enabled, and before the dcache is disabled.

I think to do this we can keep the existing soft_restart(addr) routine
with something like this:

void soft_restart(unsigned long addr)
{
	setup_mm_for_reboot();

#if defined(CONFIG_SMP)
	smp_secondary_shutdown();
#endif

	cpu_soft_restart(addr);

	/* Should never get here */
	BUG();
}


> 
> So convert the whole code to assembly to make sure that addr
> is not pushed to stack.
> 
> Signed-off-by: Arun Chandran <achandran@mvista.com>
> ---
>  arch/arm64/include/asm/proc-fns.h    |    1 +
>  arch/arm64/include/asm/system_misc.h |    1 -
>  arch/arm64/kernel/process.c          |   38 ++--------------------------------
>  arch/arm64/mm/proc.S                 |   34 ++++++++++++++++++++++++++++++
>  4 files changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
> index 0c657bb..e18d5d0 100644
> --- a/arch/arm64/include/asm/proc-fns.h
> +++ b/arch/arm64/include/asm/proc-fns.h
> @@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
>  extern void cpu_do_idle(void);
>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
> +extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));

Function prototypes are never definitions, so remove this 'extern'
keyword.  checkpatch should have warned about this.  If it did not,
report it to the checkpatch maintainers.

>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
>  
> diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
> index 7a18fab..659fbf5 100644
> --- a/arch/arm64/include/asm/system_misc.h
> +++ b/arch/arm64/include/asm/system_misc.h
> @@ -41,7 +41,6 @@ struct mm_struct;
>  extern void show_pte(struct mm_struct *mm, unsigned long addr);
>  extern void __show_regs(struct pt_regs *);
>  
> -void soft_restart(unsigned long);
>  extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
>  
>  #define UDBG_UNDEFINED	(1 << 0)
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 1309d64..3ca1ade 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -57,40 +57,6 @@ unsigned long __stack_chk_guard __read_mostly;
>  EXPORT_SYMBOL(__stack_chk_guard);
>  #endif
>  
> -static void setup_restart(void)
> -{
> -	/*
> -	 * Tell the mm system that we are going to reboot -
> -	 * we may need it to insert some 1:1 mappings so that
> -	 * soft boot works.
> -	 */
> -	setup_mm_for_reboot();
> -
> -	/* Clean and invalidate caches */
> -	flush_cache_all();
> -
> -	/* Turn D-cache off */
> -	cpu_cache_off();
> -
> -	/* Push out any further dirty data, and ensure cache is empty */
> -	flush_cache_all();
> -}
> -
> -void soft_restart(unsigned long addr)
> -{
> -	typedef void (*phys_reset_t)(unsigned long);
> -	phys_reset_t phys_reset;
> -
> -	setup_restart();
> -
> -	/* Switch to the identity mapping */
> -	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
> -	phys_reset(addr);
> -
> -	/* Should never get here */
> -	BUG();
> -}
> -
>  /*
>   * Function pointers to optional machine specific functions
>   */
> @@ -162,8 +128,8 @@ void machine_power_off(void)
>  
>  /*
>   * Restart requires that the secondary CPUs stop performing any activity
> - * while the primary CPU resets the system. Systems with a single CPU can
> - * use soft_restart() as their machine descriptor's .restart hook, since that
> + * while the primary CPU resets the system. Systems with a single CPU can use
> + * cpu_soft_restart() as their machine descriptor's .restart hook, since that
>   * will cause the only available CPU to reset. Systems with multiple CPUs must
>   * provide a HW restart implementation, to ensure that all CPUs reset at once.
>   * This is required so that any code running after reset on the primary CPU
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 7736779..a7c3fce 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -76,6 +76,40 @@ ENTRY(cpu_reset)
>  	ret	x0
>  ENDPROC(cpu_reset)
>  
> +	.align 3
> +1:	.quad	memstart_addr
> +
> +ENTRY(cpu_soft_restart)
> +	adr	x1, cpu_reset
> +	adr	x2, 1b
> +
> +	/* virt_to_phys(cpu_reset) */
> +	ldr	x3, [x2]
> +	ldr	x3, [x3]
> +	mov	x4, #1
> +	lsl	x4, x4, #(VA_BITS - 1)
> +	add	x1, x1, x4
> +	add	x1, x1, x3
> +
> +	/* Save it; We can't use stack as it is going to run with caches OFF */
> +	mov	x19, x0
> +	mov	x20, x1
> +
> +	bl	setup_mm_for_reboot
> +
> +	bl	flush_cache_all
> +	/* Turn D-cache off */
> +	bl	cpu_cache_off
> +	/* Push out any further dirty data, and ensure cache is empty */
> +	bl	flush_cache_all

It would be nice to have some blank lines above the comments.  Same
below.

> +	mov	x0, x19
> +
> +	br	x20
> +	/* It should never come back */
> +	bl	panic
> +ENDPROC(cpu_soft_restart)
> +
>  /*
>   *	cpu_do_idle()
>   *

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-15 17:20 ` [PATCH] Arm64: convert soft_restart() to assembly code Geoff Levand
@ 2014-08-15 18:21   ` Mark Rutland
  2014-08-15 18:53     ` Geoff Levand
                       ` (3 more replies)
  0 siblings, 4 replies; 33+ messages in thread
From: Mark Rutland @ 2014-08-15 18:21 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
> Hi Arun,
> 
> On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
> > soft_restart() will fail on arm64 systems that does not
> > quarantee the flushing of cache to PoC with flush_cache_all().
> > 
> > soft_restart(addr)
> > {
> > 	push_to_stack(addr);
> > 
> > 	Do mm setup for restart;
> > 	Flush&turnoff D-cache;
> > 
> > 	pop_from_stack(addr); --> fails here as addr is not at PoC
> > 	cpu_reset(addr) --> Jumps to invalid address
> > }
> 
> For the cpu-ops shutdown I'm working on I need a call to move the
> secondary processors to an identity mapped spin loop after the identity
> map is enabled.  I want to do this in C code, so it needs to happen
> after the identity map is enabled, and before the dcache is disabled.
> 
> I think to do this we can keep the existing soft_restart(addr) routine
> with something like this:
> 
> void soft_restart(unsigned long addr)
> {
> 	setup_mm_for_reboot();
> 
> #if defined(CONFIG_SMP)
> 	smp_secondary_shutdown();
> #endif
> 
> 	cpu_soft_restart(addr);
> 
> 	/* Should never get here */
> 	BUG();
> }
> 

I don't follow why you need a hook in the middle of soft_restart. That
sounds like a layering violation to me.

I assume this is for implementing the spin-table cpu-return-addr idea?

If so, what's wrong with something like:

#define ADDR_INVALID ((unsigned long)-1)
static DEFINE_PER_CPU(unsigned long, return_addr) = ADDR_INVALID;

int spin_table_cpu_disable(unsigned int cpu)
{
	if (per_cpu(return_addr, cpu) != ADDR_INVALID)
		return 0;
	
	return -EOPNOTSUPP;
}

void spin_table_cpu_die(unsigned int cpu)
{
	unsigned long release_addr = per_cpu(return_addr, cpu);
	
	/*
	 * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
	 * something similar as these are all context synchronising and
	 * therefore expensive.
	 */
	local_dbg_disable();
	local_async_disable();
	local_fiq_disable();
	arch_local_irq_disable();

	soft_restart(release_addr);
}

[...]

> > diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
> > index 0c657bb..e18d5d0 100644
> > --- a/arch/arm64/include/asm/proc-fns.h
> > +++ b/arch/arm64/include/asm/proc-fns.h
> > @@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
> >  extern void cpu_do_idle(void);
> >  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
> >  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
> > +extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));
> 
> Function prototypes are never definitions, so remove this 'extern'
> keyword.  checkpatch should have warned about this.  If it did not,
> report it to the checkpatch maintainers.

Good point.

Arun, could you fix up the latest version [1] of your patch to not use
extern for the function declaration?

If you'd be willing to spin a preparatory patch removing the other
externs on function declarations in asm/proc-fns.h, that would be
appreciated. Remember to add a Reported-by for Geoff.

Also, please remember to use a version number in the patch subject (e.g.
"[PATCHv2] arm64: convert part of soft_restart() to assembly"), as that
will make it easier to find the latest version in future.

[...]

> > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> > index 7736779..a7c3fce 100644
> > --- a/arch/arm64/mm/proc.S
> > +++ b/arch/arm64/mm/proc.S
> > @@ -76,6 +76,40 @@ ENTRY(cpu_reset)
> >  	ret	x0
> >  ENDPROC(cpu_reset)
> >  
> > +	.align 3
> > +1:	.quad	memstart_addr
> > +
> > +ENTRY(cpu_soft_restart)
> > +	adr	x1, cpu_reset
> > +	adr	x2, 1b
> > +
> > +	/* virt_to_phys(cpu_reset) */
> > +	ldr	x3, [x2]
> > +	ldr	x3, [x3]
> > +	mov	x4, #1
> > +	lsl	x4, x4, #(VA_BITS - 1)
> > +	add	x1, x1, x4
> > +	add	x1, x1, x3
> > +
> > +	/* Save it; We can't use stack as it is going to run with caches OFF */
> > +	mov	x19, x0
> > +	mov	x20, x1
> > +
> > +	bl	setup_mm_for_reboot
> > +
> > +	bl	flush_cache_all
> > +	/* Turn D-cache off */
> > +	bl	cpu_cache_off
> > +	/* Push out any further dirty data, and ensure cache is empty */
> > +	bl	flush_cache_all
> 
> It would be nice to have some blank lines above the comments.  Same
> below.

Makes sense to me. For the latest version [1], that should only mean a
line after the call to cpu_cache_off in cpu_soft_restart.

Cheers,
Mark.

[1] lists.infradead.org/pipermail/linux-arm-kernel/2014-August/279390.html

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-15 18:21   ` Mark Rutland
@ 2014-08-15 18:53     ` Geoff Levand
  2014-08-18 16:02       ` Mark Rutland
  2014-08-18  6:43     ` Arun Chandran
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 33+ messages in thread
From: Geoff Levand @ 2014-08-15 18:53 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
> On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
> > For the cpu-ops shutdown I'm working on I need a call to move the
> > secondary processors to an identity mapped spin loop after the identity
> > map is enabled.  I want to do this in C code, so it needs to happen
> > after the identity map is enabled, and before the dcache is disabled.
> > 
> > I think to do this we can keep the existing soft_restart(addr) routine
> > with something like this:
> > 
> > void soft_restart(unsigned long addr)
> > {
> > 	setup_mm_for_reboot();
> > 
> > #if defined(CONFIG_SMP)
> > 	smp_secondary_shutdown();
> > #endif
> > 
> > 	cpu_soft_restart(addr);
> > 
> > 	/* Should never get here */
> > 	BUG();
> > }
> > 
> 
> I don't follow why you need a hook in the middle of soft_restart. That
> sounds like a layering violation to me.
> 
> I assume this is for implementing the spin-table cpu-return-addr idea?

Yes.

> If so, what's wrong with something like:

> void spin_table_cpu_die(unsigned int cpu)
> {
> 	unsigned long release_addr = per_cpu(return_addr, cpu);
> 	
> 	/*
> 	 * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
> 	 * something similar as these are all context synchronising and
> 	 * therefore expensive.
> 	 */
> 	local_dbg_disable();
> 	local_async_disable();
> 	local_fiq_disable();
> 	arch_local_irq_disable();
> 
> 	soft_restart(release_addr);
> }

OK, this is a much simpler way than what I was thinking, which
was to have the secondaries spin in the kernel until the main
cpu shutdown.  I'll switch over to this, thanks.

-Geoff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-15 18:21   ` Mark Rutland
  2014-08-15 18:53     ` Geoff Levand
@ 2014-08-18  6:43     ` Arun Chandran
  2014-08-19  9:04     ` Arun Chandran
  2014-08-20 10:28     ` Arun Chandran
  3 siblings, 0 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-18  6:43 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff, Mark,

On Fri, Aug 15, 2014 at 11:51 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Geoff,
>
> On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> Hi Arun,
>>
>> On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
>> > soft_restart() will fail on arm64 systems that does not
>> > quarantee the flushing of cache to PoC with flush_cache_all().
>> >
>> > soft_restart(addr)
>> > {
>> >     push_to_stack(addr);
>> >
>> >     Do mm setup for restart;
>> >     Flush&turnoff D-cache;
>> >
>> >     pop_from_stack(addr); --> fails here as addr is not at PoC
>> >     cpu_reset(addr) --> Jumps to invalid address
>> > }
>>
>> For the cpu-ops shutdown I'm working on I need a call to move the
>> secondary processors to an identity mapped spin loop after the identity
>> map is enabled.  I want to do this in C code, so it needs to happen
>> after the identity map is enabled, and before the dcache is disabled.
>>
>> I think to do this we can keep the existing soft_restart(addr) routine
>> with something like this:
>>
>> void soft_restart(unsigned long addr)
>> {
>>       setup_mm_for_reboot();
>>
>> #if defined(CONFIG_SMP)
>>       smp_secondary_shutdown();
>> #endif
>>
>>       cpu_soft_restart(addr);
>>
>>       /* Should never get here */
>>       BUG();
>> }
>>
>
> I don't follow why you need a hook in the middle of soft_restart. That
> sounds like a layering violation to me.
>
> I assume this is for implementing the spin-table cpu-return-addr idea?
>
> If so, what's wrong with something like:
>
> #define ADDR_INVALID ((unsigned long)-1)
> static DEFINE_PER_CPU(unsigned long, return_addr) = ADDR_INVALID;
>
> int spin_table_cpu_disable(unsigned int cpu)
> {
>         if (per_cpu(return_addr, cpu) != ADDR_INVALID)
>                 return 0;
>
>         return -EOPNOTSUPP;
> }
>
> void spin_table_cpu_die(unsigned int cpu)
> {
>         unsigned long release_addr = per_cpu(return_addr, cpu);
>
>         /*
>          * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>          * something similar as these are all context synchronising and
>          * therefore expensive.
>          */
>         local_dbg_disable();
>         local_async_disable();
>         local_fiq_disable();
>         arch_local_irq_disable();
>
>         soft_restart(release_addr);
> }
>
> [...]
>
>> > diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
>> > index 0c657bb..e18d5d0 100644
>> > --- a/arch/arm64/include/asm/proc-fns.h
>> > +++ b/arch/arm64/include/asm/proc-fns.h
>> > @@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
>> >  extern void cpu_do_idle(void);
>> >  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>> >  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
>> > +extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));
>>
>> Function prototypes are never definitions, so remove this 'extern'
>> keyword.  checkpatch should have warned about this.  If it did not,
>> report it to the checkpatch maintainers.
>
> Good point.
>
> Arun, could you fix up the latest version [1] of your patch to not use
> extern for the function declaration?

Yes I will fix and name it as V1 and send.
>
> If you'd be willing to spin a preparatory patch removing the other
> externs on function declarations in asm/proc-fns.h, that would be
> appreciated. Remember to add a Reported-by for Geoff.
>
Yes I will be happy to do that :)

> Also, please remember to use a version number in the patch subject (e.g.
> "[PATCHv2] arm64: convert part of soft_restart() to assembly"), as that
> will make it easier to find the latest version in future.
>
OK. Done.

> [...]
>
>> > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> > index 7736779..a7c3fce 100644
>> > --- a/arch/arm64/mm/proc.S
>> > +++ b/arch/arm64/mm/proc.S
>> > @@ -76,6 +76,40 @@ ENTRY(cpu_reset)
>> >     ret     x0
>> >  ENDPROC(cpu_reset)
>> >
>> > +   .align 3
>> > +1: .quad   memstart_addr
>> > +
>> > +ENTRY(cpu_soft_restart)
>> > +   adr     x1, cpu_reset
>> > +   adr     x2, 1b
>> > +
>> > +   /* virt_to_phys(cpu_reset) */
>> > +   ldr     x3, [x2]
>> > +   ldr     x3, [x3]
>> > +   mov     x4, #1
>> > +   lsl     x4, x4, #(VA_BITS - 1)
>> > +   add     x1, x1, x4
>> > +   add     x1, x1, x3
>> > +
>> > +   /* Save it; We can't use stack as it is going to run with caches OFF */
>> > +   mov     x19, x0
>> > +   mov     x20, x1
>> > +
>> > +   bl      setup_mm_for_reboot
>> > +
>> > +   bl      flush_cache_all
>> > +   /* Turn D-cache off */
>> > +   bl      cpu_cache_off
>> > +   /* Push out any further dirty data, and ensure cache is empty */
>> > +   bl      flush_cache_all
>>
>> It would be nice to have some blank lines above the comments.  Same
>> below.
>
> Makes sense to me. For the latest version [1], that should only mean a
> line after the call to cpu_cache_off in cpu_soft_restart.
>

Ok.
--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-15 18:53     ` Geoff Levand
@ 2014-08-18 16:02       ` Mark Rutland
  2014-08-18 17:33         ` Christoffer Dall
                           ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Mark Rutland @ 2014-08-18 16:02 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Geoff,

On Fri, Aug 15, 2014 at 07:53:19PM +0100, Geoff Levand wrote:
> Hi Mark,
> 
> On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
> > > For the cpu-ops shutdown I'm working on I need a call to move the
> > > secondary processors to an identity mapped spin loop after the identity
> > > map is enabled.  I want to do this in C code, so it needs to happen
> > > after the identity map is enabled, and before the dcache is disabled.
> > > 
> > > I think to do this we can keep the existing soft_restart(addr) routine
> > > with something like this:
> > > 
> > > void soft_restart(unsigned long addr)
> > > {
> > > 	setup_mm_for_reboot();
> > > 
> > > #if defined(CONFIG_SMP)
> > > 	smp_secondary_shutdown();
> > > #endif
> > > 
> > > 	cpu_soft_restart(addr);
> > > 
> > > 	/* Should never get here */
> > > 	BUG();
> > > }
> > > 
> > 
> > I don't follow why you need a hook in the middle of soft_restart. That
> > sounds like a layering violation to me.
> > 
> > I assume this is for implementing the spin-table cpu-return-addr idea?
> 
> Yes.
> 
> > If so, what's wrong with something like:
> 
> > void spin_table_cpu_die(unsigned int cpu)
> > {
> > 	unsigned long release_addr = per_cpu(return_addr, cpu);
> > 	
> > 	/*
> > 	 * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
> > 	 * something similar as these are all context synchronising and
> > 	 * therefore expensive.
> > 	 */
> > 	local_dbg_disable();
> > 	local_async_disable();
> > 	local_fiq_disable();
> > 	arch_local_irq_disable();
> > 
> > 	soft_restart(release_addr);
> > }
> 
> OK, this is a much simpler way than what I was thinking, which
> was to have the secondaries spin in the kernel until the main
> cpu shutdown.  I'll switch over to this, thanks.

I just realised that this is still missing the jump to EL2 that I
mentioned a while back.

I think what we need to do is:

* Have KVM (if present) tears itself down prior to cpu_die, restoring
  the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
  
* Add a mechanism to __hyp_stub_vectors to allow a hypercall to
  call a function at EL2. We should be able to replace the current
  hyp_stub el1_sync handler with that, and rework KVM to call a function
  at EL2 to setup VBAR_EL2 appropriately at init time.

* Depending on whether EL2 is available, go via soft_restart or the
  hypercall to cpu_soft_restart (or something very close to it).

How does that sound?

Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-18 16:02       ` Mark Rutland
@ 2014-08-18 17:33         ` Christoffer Dall
  2014-08-19  1:10           ` Geoff Levand
  2014-08-20 10:48           ` Mark Rutland
  2014-08-25 11:04         ` Arun Chandran
  2014-08-25 14:14         ` Arun Chandran
  2 siblings, 2 replies; 33+ messages in thread
From: Christoffer Dall @ 2014-08-18 17:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Aug 18, 2014 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Geoff,
>
> On Fri, Aug 15, 2014 at 07:53:19PM +0100, Geoff Levand wrote:
>> Hi Mark,
>>
>> On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
>> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> > > For the cpu-ops shutdown I'm working on I need a call to move the
>> > > secondary processors to an identity mapped spin loop after the identity
>> > > map is enabled.  I want to do this in C code, so it needs to happen
>> > > after the identity map is enabled, and before the dcache is disabled.
>> > >
>> > > I think to do this we can keep the existing soft_restart(addr) routine
>> > > with something like this:
>> > >
>> > > void soft_restart(unsigned long addr)
>> > > {
>> > >   setup_mm_for_reboot();
>> > >
>> > > #if defined(CONFIG_SMP)
>> > >   smp_secondary_shutdown();
>> > > #endif
>> > >
>> > >   cpu_soft_restart(addr);
>> > >
>> > >   /* Should never get here */
>> > >   BUG();
>> > > }
>> > >
>> >
>> > I don't follow why you need a hook in the middle of soft_restart. That
>> > sounds like a layering violation to me.
>> >
>> > I assume this is for implementing the spin-table cpu-return-addr idea?
>>
>> Yes.
>>
>> > If so, what's wrong with something like:
>>
>> > void spin_table_cpu_die(unsigned int cpu)
>> > {
>> >     unsigned long release_addr = per_cpu(return_addr, cpu);
>> >
>> >     /*
>> >      * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>> >      * something similar as these are all context synchronising and
>> >      * therefore expensive.
>> >      */
>> >     local_dbg_disable();
>> >     local_async_disable();
>> >     local_fiq_disable();
>> >     arch_local_irq_disable();
>> >
>> >     soft_restart(release_addr);
>> > }
>>
>> OK, this is a much simpler way than what I was thinking, which
>> was to have the secondaries spin in the kernel until the main
>> cpu shutdown.  I'll switch over to this, thanks.
>
> I just realised that this is still missing the jump to EL2 that I
> mentioned a while back.
>
> I think what we need to do is:
>
> * Have KVM (if present) tears itself down prior to cpu_die, restoring
>   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
>
> * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
>   call a function at EL2. We should be able to replace the current
>   hyp_stub el1_sync handler with that, and rework KVM to call a function
>   at EL2 to setup VBAR_EL2 appropriately at init time.
>
Why do you need to change the current mechanism?  Is this due to the
CPU being in a different state when restarted with the MMU enabled in
EL2 or something like that?

-Christoffer

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-18 17:33         ` Christoffer Dall
@ 2014-08-19  1:10           ` Geoff Levand
  2014-08-20 10:48           ` Mark Rutland
  1 sibling, 0 replies; 33+ messages in thread
From: Geoff Levand @ 2014-08-19  1:10 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Christoffer,

On Mon, 2014-08-18 at 19:33 +0200, Christoffer Dall wrote:
> On Mon, Aug 18, 2014 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> > I just realised that this is still missing the jump to EL2 that I
> > mentioned a while back.
> >
> > I think what we need to do is:
> >
> > * Have KVM (if present) tears itself down prior to cpu_die, restoring
> >   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
> >
> > * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
> >   call a function at EL2. We should be able to replace the current
> >   hyp_stub el1_sync handler with that, and rework KVM to call a function
> >   at EL2 to setup VBAR_EL2 appropriately at init time.
> >
> Why do you need to change the current mechanism?  Is this due to the
> CPU being in a different state when restarted with the MMU enabled in
> EL2 or something like that?

I haven't looked into the details of how to do it too closely yet,
but we need to handle the case where a 1st stage kernel that is not
configured with visualization support, needs to kexec re-boot to a
2nd stage kernel that will support visualization.

Also, the current arm64 bootwrapper program expects to be entered in
EL2 or higher, so a CPU running at EL1 needs to get back to EL2
before re-entering the bootwrapper program on a CPU hot-unplug.

-Geoff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-15 18:21   ` Mark Rutland
  2014-08-15 18:53     ` Geoff Levand
  2014-08-18  6:43     ` Arun Chandran
@ 2014-08-19  9:04     ` Arun Chandran
  2014-08-20 10:28     ` Arun Chandran
  3 siblings, 0 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-19  9:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Aug 15, 2014 at 11:51 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Geoff,
>
> On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> Hi Arun,
>>
>> On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
>> > soft_restart() will fail on arm64 systems that does not
>> > quarantee the flushing of cache to PoC with flush_cache_all().
>> >
>> > soft_restart(addr)
>> > {
>> >     push_to_stack(addr);
>> >
>> >     Do mm setup for restart;
>> >     Flush&turnoff D-cache;
>> >
>> >     pop_from_stack(addr); --> fails here as addr is not at PoC
>> >     cpu_reset(addr) --> Jumps to invalid address
>> > }
>>
>> For the cpu-ops shutdown I'm working on I need a call to move the
>> secondary processors to an identity mapped spin loop after the identity
>> map is enabled.  I want to do this in C code, so it needs to happen
>> after the identity map is enabled, and before the dcache is disabled.
>>
>> I think to do this we can keep the existing soft_restart(addr) routine
>> with something like this:
>>
>> void soft_restart(unsigned long addr)
>> {
>>       setup_mm_for_reboot();
>>
>> #if defined(CONFIG_SMP)
>>       smp_secondary_shutdown();
>> #endif
>>
>>       cpu_soft_restart(addr);
>>
>>       /* Should never get here */
>>       BUG();
>> }
>>
>
> I don't follow why you need a hook in the middle of soft_restart. That
> sounds like a layering violation to me.
>
> I assume this is for implementing the spin-table cpu-return-addr idea?
>
> If so, what's wrong with something like:
>
> #define ADDR_INVALID ((unsigned long)-1)
> static DEFINE_PER_CPU(unsigned long, return_addr) = ADDR_INVALID;
>
> int spin_table_cpu_disable(unsigned int cpu)
> {
>         if (per_cpu(return_addr, cpu) != ADDR_INVALID)
>                 return 0;
>
>         return -EOPNOTSUPP;
> }
>
> void spin_table_cpu_die(unsigned int cpu)
> {
>         unsigned long release_addr = per_cpu(return_addr, cpu);
>
>         /*
>          * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>          * something similar as these are all context synchronising and
>          * therefore expensive.
>          */
>         local_dbg_disable();
>         local_async_disable();
>         local_fiq_disable();
>         arch_local_irq_disable();
>
>         soft_restart(release_addr);
> }
>
> [...]
>
>> > diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
>> > index 0c657bb..e18d5d0 100644
>> > --- a/arch/arm64/include/asm/proc-fns.h
>> > +++ b/arch/arm64/include/asm/proc-fns.h
>> > @@ -32,6 +32,7 @@ extern void cpu_cache_off(void);
>> >  extern void cpu_do_idle(void);
>> >  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>> >  extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
>> > +extern void cpu_soft_restart(unsigned long addr) __attribute__((noreturn));
>>
>> Function prototypes are never definitions, so remove this 'extern'
>> keyword.  checkpatch should have warned about this.  If it did not,
>> report it to the checkpatch maintainers.
>
> Good point.
>
> Arun, could you fix up the latest version [1] of your patch to not use
> extern for the function declaration?
>
> If you'd be willing to spin a preparatory patch removing the other
> externs on function declarations in asm/proc-fns.h, that would be
> appreciated. Remember to add a Reported-by for Geoff.
>

Today I did "find arch/arm64 -name "*.h" | xargs grep -l "^extern"" in v3.16
and got

arch/arm64/mm/mm.h
arch/arm64/include/asm/exec.h
arch/arm64/include/asm/cpu_ops.h
arch/arm64/include/asm/suspend.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/include/asm/stackprotector.h
arch/arm64/include/asm/stacktrace.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/cachetype.h
arch/arm64/include/asm/cputable.h
arch/arm64/include/asm/virt.h
arch/arm64/include/asm/efi.h
arch/arm64/include/asm/elf.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/bitops.h
arch/arm64/include/asm/kgdb.h
arch/arm64/include/asm/fixmap.h
arch/arm64/include/asm/uaccess.h
arch/arm64/include/asm/page.h
arch/arm64/include/asm/smp_plat.h
arch/arm64/include/asm/memblock.h
arch/arm64/include/asm/perf_event.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/smp.h
arch/arm64/include/asm/hw_breakpoint.h
arch/arm64/include/asm/string.h
arch/arm64/include/asm/dma-mapping.h
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/mmu.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/system_misc.h
arch/arm64/include/asm/signal32.h
arch/arm64/include/asm/hardirq.h
arch/arm64/include/asm/ftrace.h
arch/arm64/include/asm/io.h
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/irq.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/syscall.h
arch/arm64/include/asm/topology.h
arch/arm64/include/asm/kvm_asm.h

No Idea what to do with the above ones.
Anyway I have sent a patch to remove it from asm/proc-fns.h.

--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-15 18:21   ` Mark Rutland
                       ` (2 preceding siblings ...)
  2014-08-19  9:04     ` Arun Chandran
@ 2014-08-20 10:28     ` Arun Chandran
  2014-08-20 10:54       ` Mark Rutland
  3 siblings, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-20 10:28 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Fri, Aug 15, 2014 at 11:51 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Geoff,
>
> On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> Hi Arun,
>>
>> On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
>> > soft_restart() will fail on arm64 systems that does not
>> > quarantee the flushing of cache to PoC with flush_cache_all().
>> >
>> > soft_restart(addr)
>> > {
>> >     push_to_stack(addr);
>> >
>> >     Do mm setup for restart;
>> >     Flush&turnoff D-cache;
>> >
>> >     pop_from_stack(addr); --> fails here as addr is not at PoC
>> >     cpu_reset(addr) --> Jumps to invalid address
>> > }
>>
>> For the cpu-ops shutdown I'm working on I need a call to move the
>> secondary processors to an identity mapped spin loop after the identity
>> map is enabled.  I want to do this in C code, so it needs to happen
>> after the identity map is enabled, and before the dcache is disabled.
>>
>> I think to do this we can keep the existing soft_restart(addr) routine
>> with something like this:
>>
>> void soft_restart(unsigned long addr)
>> {
>>       setup_mm_for_reboot();
>>
>> #if defined(CONFIG_SMP)
>>       smp_secondary_shutdown();
>> #endif
>>
>>       cpu_soft_restart(addr);
>>
>>       /* Should never get here */
>>       BUG();
>> }
>>
>
> I don't follow why you need a hook in the middle of soft_restart. That
> sounds like a layering violation to me.
>
> I assume this is for implementing the spin-table cpu-return-addr idea?
>
> If so, what's wrong with something like:
>
> #define ADDR_INVALID ((unsigned long)-1)
> static DEFINE_PER_CPU(unsigned long, return_addr) = ADDR_INVALID;
>
> int spin_table_cpu_disable(unsigned int cpu)
> {
>         if (per_cpu(return_addr, cpu) != ADDR_INVALID)
>                 return 0;
>
>         return -EOPNOTSUPP;
> }
>
> void spin_table_cpu_die(unsigned int cpu)
> {
>         unsigned long release_addr = per_cpu(return_addr, cpu);
>
>         /*
>          * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>          * something similar as these are all context synchronising and
>          * therefore expensive.
>          */
>         local_dbg_disable();
>         local_async_disable();
>         local_fiq_disable();
>         arch_local_irq_disable();
>
>         soft_restart(release_addr);
> }
>

I am trying the above method for kexec.

As of now I have hardcoded cpu-return-addr , which is 0x0 in my
case.

##################
diff --git a/arch/arm64/kernel/smp_spin_table.c
b/arch/arm64/kernel/smp_spin_table.c
index e54ceed..e7275b3 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -27,6 +27,7 @@
 #include <asm/cpu_ops.h>
 #include <asm/cputype.h>
 #include <asm/smp_plat.h>
+#include <asm/system_misc.h>

 #include "cpu-properties.h"

@@ -97,7 +98,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
         * boot-loader's endianess before jumping. This is mandated by
         * the boot protocol.
         */
-       writeq_relaxed(__pa(secondary_holding_entry), release_addr);
+       writeq_relaxed(__pa(secondary_holding_pen), release_addr);
        __flush_dcache_area((__force void *)release_addr,
                            sizeof(*release_addr));

@@ -135,18 +136,22 @@ static void smp_spin_table_cpu_die(unsigned int cpu)
 {
        u64 *release_addr = phys_to_virt(cpu_release_addr[cpu]);

-       pr_devel("%s:%d: id: %u, holding count: %lu\n", __func__, __LINE__,
-               smp_processor_id(), secondary_holding_pen_count);
+       pr_devel("%s:%d: id: %u dying\n", __func__, __LINE__,
+               smp_processor_id());

        /* Send cpu back to secondary_holding_pen. */

+       local_dbg_disable();    // FIXME: need this???
+       local_async_disable();  // FIXME: need this???
        local_fiq_disable();    // FIXME: need this???
+       arch_local_irq_disable();

        *release_addr = 0;
+       __flush_dcache_area(release_addr, 8);

        mb();

-       secondary_holding_pen();
+       soft_restart(0);

        BUG();
 }
###########################

I can see that my secondary cpu's are not going to the exact same
state as they were in when I boot a SMP kernel from u-boot.

[[They are not waiting for addr(secondary_holding_pen()) at
 release_addr]]

Like geoff mentioned in another mail do we need to
to get back to EL2 before re-entering the bootwrapper program?

--Arun

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-18 17:33         ` Christoffer Dall
  2014-08-19  1:10           ` Geoff Levand
@ 2014-08-20 10:48           ` Mark Rutland
  2014-08-20 10:54             ` Christoffer Dall
  1 sibling, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-20 10:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Aug 18, 2014 at 06:33:36PM +0100, Christoffer Dall wrote:
> On Mon, Aug 18, 2014 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> > Hi Geoff,
> >
> > On Fri, Aug 15, 2014 at 07:53:19PM +0100, Geoff Levand wrote:
> >> Hi Mark,
> >>
> >> On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
> >> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
> >> > > For the cpu-ops shutdown I'm working on I need a call to move the
> >> > > secondary processors to an identity mapped spin loop after the identity
> >> > > map is enabled.  I want to do this in C code, so it needs to happen
> >> > > after the identity map is enabled, and before the dcache is disabled.
> >> > >
> >> > > I think to do this we can keep the existing soft_restart(addr) routine
> >> > > with something like this:
> >> > >
> >> > > void soft_restart(unsigned long addr)
> >> > > {
> >> > >   setup_mm_for_reboot();
> >> > >
> >> > > #if defined(CONFIG_SMP)
> >> > >   smp_secondary_shutdown();
> >> > > #endif
> >> > >
> >> > >   cpu_soft_restart(addr);
> >> > >
> >> > >   /* Should never get here */
> >> > >   BUG();
> >> > > }
> >> > >
> >> >
> >> > I don't follow why you need a hook in the middle of soft_restart. That
> >> > sounds like a layering violation to me.
> >> >
> >> > I assume this is for implementing the spin-table cpu-return-addr idea?
> >>
> >> Yes.
> >>
> >> > If so, what's wrong with something like:
> >>
> >> > void spin_table_cpu_die(unsigned int cpu)
> >> > {
> >> >     unsigned long release_addr = per_cpu(return_addr, cpu);
> >> >
> >> >     /*
> >> >      * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
> >> >      * something similar as these are all context synchronising and
> >> >      * therefore expensive.
> >> >      */
> >> >     local_dbg_disable();
> >> >     local_async_disable();
> >> >     local_fiq_disable();
> >> >     arch_local_irq_disable();
> >> >
> >> >     soft_restart(release_addr);
> >> > }
> >>
> >> OK, this is a much simpler way than what I was thinking, which
> >> was to have the secondaries spin in the kernel until the main
> >> cpu shutdown.  I'll switch over to this, thanks.
> >
> > I just realised that this is still missing the jump to EL2 that I
> > mentioned a while back.
> >
> > I think what we need to do is:
> >
> > * Have KVM (if present) tears itself down prior to cpu_die, restoring
> >   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
> >
> > * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
> >   call a function at EL2. We should be able to replace the current
> >   hyp_stub el1_sync handler with that, and rework KVM to call a function
> >   at EL2 to setup VBAR_EL2 appropriately at init time.
> >
> Why do you need to change the current mechanism?  Is this due to the
> CPU being in a different state when restarted with the MMU enabled in
> EL2 or something like that?

Something like that, yes.

For hotplug with spin-table we need to return CPUs to the spin-table in
the mode they entered to prevent mismatched modes when we throw those
CPUs back into the kernel. For kexec we need to move the final CPU up to
the mode it started in before we branch to the new kernel. If we don't
do that then we either get mismatched modes or lose the use of EL2.

Whatever mechanism we use for this needs to be independent of KVM. 
Ideally this would be in the hyp_stub vectors and we'd have KVM tear
itself down at EL2 and restore the hyp_stub before we offline a CPU.

I'd rather not have a custom set of EL2 vectors that the spin-table code
has to install via the curent mechanism, so IMO reworking the hyp stub
to implement a simple function call hypercall would be preferable.  KVM
can use that to set up its vectors and the spin-table and kexec code
could use to leave the kernel at EL2.

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-20 10:28     ` Arun Chandran
@ 2014-08-20 10:54       ` Mark Rutland
  2014-08-20 13:57         ` Arun Chandran
  0 siblings, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-20 10:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 20, 2014 at 11:28:52AM +0100, Arun Chandran wrote:
> Hi Mark,

Hi Arun,

> On Fri, Aug 15, 2014 at 11:51 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> > Hi Geoff,
> >
> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
> >> Hi Arun,
> >>
> >> On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
> >> > soft_restart() will fail on arm64 systems that does not
> >> > quarantee the flushing of cache to PoC with flush_cache_all().
> >> >
> >> > soft_restart(addr)
> >> > {
> >> >     push_to_stack(addr);
> >> >
> >> >     Do mm setup for restart;
> >> >     Flush&turnoff D-cache;
> >> >
> >> >     pop_from_stack(addr); --> fails here as addr is not at PoC
> >> >     cpu_reset(addr) --> Jumps to invalid address
> >> > }
> >>
> >> For the cpu-ops shutdown I'm working on I need a call to move the
> >> secondary processors to an identity mapped spin loop after the identity
> >> map is enabled.  I want to do this in C code, so it needs to happen
> >> after the identity map is enabled, and before the dcache is disabled.
> >>
> >> I think to do this we can keep the existing soft_restart(addr) routine
> >> with something like this:
> >>
> >> void soft_restart(unsigned long addr)
> >> {
> >>       setup_mm_for_reboot();
> >>
> >> #if defined(CONFIG_SMP)
> >>       smp_secondary_shutdown();
> >> #endif
> >>
> >>       cpu_soft_restart(addr);
> >>
> >>       /* Should never get here */
> >>       BUG();
> >> }
> >>
> >
> > I don't follow why you need a hook in the middle of soft_restart. That
> > sounds like a layering violation to me.
> >
> > I assume this is for implementing the spin-table cpu-return-addr idea?
> >
> > If so, what's wrong with something like:
> >
> > #define ADDR_INVALID ((unsigned long)-1)
> > static DEFINE_PER_CPU(unsigned long, return_addr) = ADDR_INVALID;
> >
> > int spin_table_cpu_disable(unsigned int cpu)
> > {
> >         if (per_cpu(return_addr, cpu) != ADDR_INVALID)
> >                 return 0;
> >
> >         return -EOPNOTSUPP;
> > }
> >
> > void spin_table_cpu_die(unsigned int cpu)
> > {
> >         unsigned long release_addr = per_cpu(return_addr, cpu);
> >
> >         /*
> >          * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
> >          * something similar as these are all context synchronising and
> >          * therefore expensive.
> >          */
> >         local_dbg_disable();
> >         local_async_disable();
> >         local_fiq_disable();
> >         arch_local_irq_disable();
> >
> >         soft_restart(release_addr);
> > }
> >
> 
> I am trying the above method for kexec.
> 
> As of now I have hardcoded cpu-return-addr , which is 0x0 in my
> case.
> 
> ##################
> diff --git a/arch/arm64/kernel/smp_spin_table.c
> b/arch/arm64/kernel/smp_spin_table.c
> index e54ceed..e7275b3 100644
> --- a/arch/arm64/kernel/smp_spin_table.c
> +++ b/arch/arm64/kernel/smp_spin_table.c
> @@ -27,6 +27,7 @@
>  #include <asm/cpu_ops.h>
>  #include <asm/cputype.h>
>  #include <asm/smp_plat.h>
> +#include <asm/system_misc.h>
> 
>  #include "cpu-properties.h"
> 
> @@ -97,7 +98,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
>          * boot-loader's endianess before jumping. This is mandated by
>          * the boot protocol.
>          */
> -       writeq_relaxed(__pa(secondary_holding_entry), release_addr);
> +       writeq_relaxed(__pa(secondary_holding_pen), release_addr);
>         __flush_dcache_area((__force void *)release_addr,
>                             sizeof(*release_addr));
> 
> @@ -135,18 +136,22 @@ static void smp_spin_table_cpu_die(unsigned int cpu)
>  {
>         u64 *release_addr = phys_to_virt(cpu_release_addr[cpu]);
> 
> -       pr_devel("%s:%d: id: %u, holding count: %lu\n", __func__, __LINE__,
> -               smp_processor_id(), secondary_holding_pen_count);
> +       pr_devel("%s:%d: id: %u dying\n", __func__, __LINE__,
> +               smp_processor_id());
> 
>         /* Send cpu back to secondary_holding_pen. */
> 
> +       local_dbg_disable();    // FIXME: need this???
> +       local_async_disable();  // FIXME: need this???
>         local_fiq_disable();    // FIXME: need this???
> +       arch_local_irq_disable();
> 
>         *release_addr = 0;
> +       __flush_dcache_area(release_addr, 8);
> 
>         mb();
> 
> -       secondary_holding_pen();
> +       soft_restart(0);
> 
>         BUG();
>  }
> ###########################
> 
> I can see that my secondary cpu's are not going to the exact same
> state as they were in when I boot a SMP kernel from u-boot.
> 
> [[They are not waiting for addr(secondary_holding_pen()) at
>  release_addr]]
> 
> Like geoff mentioned in another mail do we need to
> to get back to EL2 before re-entering the bootwrapper program?

As I mentioned we do need to ensure that the CPUs are in the mode they
started in, though I'm not sure I follow what you mean by "not waiting".
This could be an orthogonal issue.

What exactly do you see, do the CPUs leave the spin-table, are they
taking exceptions, are they getting stuck in the spin-table, etc?

Are the cpu-release-addr entries getting restored to zero before each
CPU begins polling them?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-20 10:48           ` Mark Rutland
@ 2014-08-20 10:54             ` Christoffer Dall
  2014-08-20 11:21               ` Mark Rutland
  0 siblings, 1 reply; 33+ messages in thread
From: Christoffer Dall @ 2014-08-20 10:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 20, 2014 at 12:48 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Mon, Aug 18, 2014 at 06:33:36PM +0100, Christoffer Dall wrote:
>> On Mon, Aug 18, 2014 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > Hi Geoff,
>> >
>> > On Fri, Aug 15, 2014 at 07:53:19PM +0100, Geoff Levand wrote:
>> >> Hi Mark,
>> >>
>> >> On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
>> >> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> >> > > For the cpu-ops shutdown I'm working on I need a call to move the
>> >> > > secondary processors to an identity mapped spin loop after the identity
>> >> > > map is enabled.  I want to do this in C code, so it needs to happen
>> >> > > after the identity map is enabled, and before the dcache is disabled.
>> >> > >
>> >> > > I think to do this we can keep the existing soft_restart(addr) routine
>> >> > > with something like this:
>> >> > >
>> >> > > void soft_restart(unsigned long addr)
>> >> > > {
>> >> > >   setup_mm_for_reboot();
>> >> > >
>> >> > > #if defined(CONFIG_SMP)
>> >> > >   smp_secondary_shutdown();
>> >> > > #endif
>> >> > >
>> >> > >   cpu_soft_restart(addr);
>> >> > >
>> >> > >   /* Should never get here */
>> >> > >   BUG();
>> >> > > }
>> >> > >
>> >> >
>> >> > I don't follow why you need a hook in the middle of soft_restart. That
>> >> > sounds like a layering violation to me.
>> >> >
>> >> > I assume this is for implementing the spin-table cpu-return-addr idea?
>> >>
>> >> Yes.
>> >>
>> >> > If so, what's wrong with something like:
>> >>
>> >> > void spin_table_cpu_die(unsigned int cpu)
>> >> > {
>> >> >     unsigned long release_addr = per_cpu(return_addr, cpu);
>> >> >
>> >> >     /*
>> >> >      * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>> >> >      * something similar as these are all context synchronising and
>> >> >      * therefore expensive.
>> >> >      */
>> >> >     local_dbg_disable();
>> >> >     local_async_disable();
>> >> >     local_fiq_disable();
>> >> >     arch_local_irq_disable();
>> >> >
>> >> >     soft_restart(release_addr);
>> >> > }
>> >>
>> >> OK, this is a much simpler way than what I was thinking, which
>> >> was to have the secondaries spin in the kernel until the main
>> >> cpu shutdown.  I'll switch over to this, thanks.
>> >
>> > I just realised that this is still missing the jump to EL2 that I
>> > mentioned a while back.
>> >
>> > I think what we need to do is:
>> >
>> > * Have KVM (if present) tears itself down prior to cpu_die, restoring
>> >   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
>> >
>> > * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
>> >   call a function at EL2. We should be able to replace the current
>> >   hyp_stub el1_sync handler with that, and rework KVM to call a function
>> >   at EL2 to setup VBAR_EL2 appropriately at init time.
>> >
>> Why do you need to change the current mechanism?  Is this due to the
>> CPU being in a different state when restarted with the MMU enabled in
>> EL2 or something like that?
>
> Something like that, yes.
>
> For hotplug with spin-table we need to return CPUs to the spin-table in
> the mode they entered to prevent mismatched modes when we throw those
> CPUs back into the kernel. For kexec we need to move the final CPU up to
> the mode it started in before we branch to the new kernel. If we don't
> do that then we either get mismatched modes or lose the use of EL2.
>
> Whatever mechanism we use for this needs to be independent of KVM.
> Ideally this would be in the hyp_stub vectors and we'd have KVM tear
> itself down at EL2 and restore the hyp_stub before we offline a CPU.
>
> I'd rather not have a custom set of EL2 vectors that the spin-table code
> has to install via the curent mechanism, so IMO reworking the hyp stub
> to implement a simple function call hypercall would be preferable.  KVM
> can use that to set up its vectors and the spin-table and kexec code
> could use to leave the kernel at EL2.
>
So you'd still always assume the hyp-stub mechanism has the MMU turned
off at EL2, but just make it easier for callers to deal with,
essentially. As far as I can tell, there shouldn't be any problems
converting the hyp-stub API to specify a function to call in EL2
rather than the current method of replacing the vectors.

Letting KVM tear itself down and re-establish the hyp-stub API as it
was at boot time seems completely reasonable to me.

-Christoffer

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-20 10:54             ` Christoffer Dall
@ 2014-08-20 11:21               ` Mark Rutland
  0 siblings, 0 replies; 33+ messages in thread
From: Mark Rutland @ 2014-08-20 11:21 UTC (permalink / raw)
  To: linux-arm-kernel

[...]

> >> > I just realised that this is still missing the jump to EL2 that I
> >> > mentioned a while back.
> >> >
> >> > I think what we need to do is:
> >> >
> >> > * Have KVM (if present) tears itself down prior to cpu_die, restoring
> >> >   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
> >> >
> >> > * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
> >> >   call a function at EL2. We should be able to replace the current
> >> >   hyp_stub el1_sync handler with that, and rework KVM to call a function
> >> >   at EL2 to setup VBAR_EL2 appropriately at init time.
> >> >
> >> Why do you need to change the current mechanism?  Is this due to the
> >> CPU being in a different state when restarted with the MMU enabled in
> >> EL2 or something like that?
> >
> > Something like that, yes.
> >
> > For hotplug with spin-table we need to return CPUs to the spin-table in
> > the mode they entered to prevent mismatched modes when we throw those
> > CPUs back into the kernel. For kexec we need to move the final CPU up to
> > the mode it started in before we branch to the new kernel. If we don't
> > do that then we either get mismatched modes or lose the use of EL2.
> >
> > Whatever mechanism we use for this needs to be independent of KVM.
> > Ideally this would be in the hyp_stub vectors and we'd have KVM tear
> > itself down at EL2 and restore the hyp_stub before we offline a CPU.
> >
> > I'd rather not have a custom set of EL2 vectors that the spin-table code
> > has to install via the curent mechanism, so IMO reworking the hyp stub
> > to implement a simple function call hypercall would be preferable.  KVM
> > can use that to set up its vectors and the spin-table and kexec code
> > could use to leave the kernel at EL2.
> >
> So you'd still always assume the hyp-stub mechanism has the MMU turned
> off at EL2, but just make it easier for callers to deal with,
> essentially.

Yes. I'd only expect this would be used by a few assembly functions that
would assume a very bare EL2 (only expecting what we initialize in
el2_setup). Having a function call hypercall just makes it easier for
callers and prevents a proliferation of temporary EL2 vectors.

> As far as I can tell, there shouldn't be any problems
> converting the hyp-stub API to specify a function to call in EL2
> rather than the current method of replacing the vectors.
> 
> Letting KVM tear itself down and re-establish the hyp-stub API as it
> was at boot time seems completely reasonable to me.

That's exactly what I was hoping to hear. :)

Now the only thing to figure out is what that tear-down hangs off of.

I thought that would go in kvm_arch_hardware_disable, but it doesn't
look like we use either of kvm_arch_hardware_{enable,disable}. I guess
that would fall in the hyp_init_cpu_notify notifier call.

Is there a reason we have our own notifier rather than using the
kvm_arch_hardware_{enable,disable} calls?

Cheers,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-20 10:54       ` Mark Rutland
@ 2014-08-20 13:57         ` Arun Chandran
  2014-08-20 14:16           ` Mark Rutland
  0 siblings, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-20 13:57 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Aug 20, 2014 at 4:24 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Wed, Aug 20, 2014 at 11:28:52AM +0100, Arun Chandran wrote:
>> Hi Mark,
>
> Hi Arun,
>
>> On Fri, Aug 15, 2014 at 11:51 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > Hi Geoff,
>> >
>> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> >> Hi Arun,
>> >>
>> >> On Tue, 2014-08-12 at 18:12 +0530, Arun Chandran wrote:
>> >> > soft_restart() will fail on arm64 systems that does not
>> >> > quarantee the flushing of cache to PoC with flush_cache_all().
>> >> >
>> >> > soft_restart(addr)
>> >> > {
>> >> >     push_to_stack(addr);
>> >> >
>> >> >     Do mm setup for restart;
>> >> >     Flush&turnoff D-cache;
>> >> >
>> >> >     pop_from_stack(addr); --> fails here as addr is not at PoC
>> >> >     cpu_reset(addr) --> Jumps to invalid address
>> >> > }
>> >>
>> >> For the cpu-ops shutdown I'm working on I need a call to move the
>> >> secondary processors to an identity mapped spin loop after the identity
>> >> map is enabled.  I want to do this in C code, so it needs to happen
>> >> after the identity map is enabled, and before the dcache is disabled.
>> >>
>> >> I think to do this we can keep the existing soft_restart(addr) routine
>> >> with something like this:
>> >>
>> >> void soft_restart(unsigned long addr)
>> >> {
>> >>       setup_mm_for_reboot();
>> >>
>> >> #if defined(CONFIG_SMP)
>> >>       smp_secondary_shutdown();
>> >> #endif
>> >>
>> >>       cpu_soft_restart(addr);
>> >>
>> >>       /* Should never get here */
>> >>       BUG();
>> >> }
>> >>
>> >
>> > I don't follow why you need a hook in the middle of soft_restart. That
>> > sounds like a layering violation to me.
>> >
>> > I assume this is for implementing the spin-table cpu-return-addr idea?
>> >
>> > If so, what's wrong with something like:
>> >
>> > #define ADDR_INVALID ((unsigned long)-1)
>> > static DEFINE_PER_CPU(unsigned long, return_addr) = ADDR_INVALID;
>> >
>> > int spin_table_cpu_disable(unsigned int cpu)
>> > {
>> >         if (per_cpu(return_addr, cpu) != ADDR_INVALID)
>> >                 return 0;
>> >
>> >         return -EOPNOTSUPP;
>> > }
>> >
>> > void spin_table_cpu_die(unsigned int cpu)
>> > {
>> >         unsigned long release_addr = per_cpu(return_addr, cpu);
>> >
>> >         /*
>> >          * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>> >          * something similar as these are all context synchronising and
>> >          * therefore expensive.
>> >          */
>> >         local_dbg_disable();
>> >         local_async_disable();
>> >         local_fiq_disable();
>> >         arch_local_irq_disable();
>> >
>> >         soft_restart(release_addr);
>> > }
>> >
>>
>> I am trying the above method for kexec.
>>
>> As of now I have hardcoded cpu-return-addr , which is 0x0 in my
>> case.
>>
>> ##################
>> diff --git a/arch/arm64/kernel/smp_spin_table.c
>> b/arch/arm64/kernel/smp_spin_table.c
>> index e54ceed..e7275b3 100644
>> --- a/arch/arm64/kernel/smp_spin_table.c
>> +++ b/arch/arm64/kernel/smp_spin_table.c
>> @@ -27,6 +27,7 @@
>>  #include <asm/cpu_ops.h>
>>  #include <asm/cputype.h>
>>  #include <asm/smp_plat.h>
>> +#include <asm/system_misc.h>
>>
>>  #include "cpu-properties.h"
>>
>> @@ -97,7 +98,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
>>          * boot-loader's endianess before jumping. This is mandated by
>>          * the boot protocol.
>>          */
>> -       writeq_relaxed(__pa(secondary_holding_entry), release_addr);
>> +       writeq_relaxed(__pa(secondary_holding_pen), release_addr);
>>         __flush_dcache_area((__force void *)release_addr,
>>                             sizeof(*release_addr));
>>
>> @@ -135,18 +136,22 @@ static void smp_spin_table_cpu_die(unsigned int cpu)
>>  {
>>         u64 *release_addr = phys_to_virt(cpu_release_addr[cpu]);
>>
>> -       pr_devel("%s:%d: id: %u, holding count: %lu\n", __func__, __LINE__,
>> -               smp_processor_id(), secondary_holding_pen_count);
>> +       pr_devel("%s:%d: id: %u dying\n", __func__, __LINE__,
>> +               smp_processor_id());
>>
>>         /* Send cpu back to secondary_holding_pen. */
>>
>> +       local_dbg_disable();    // FIXME: need this???
>> +       local_async_disable();  // FIXME: need this???
>>         local_fiq_disable();    // FIXME: need this???
>> +       arch_local_irq_disable();
>>
>>         *release_addr = 0;
>> +       __flush_dcache_area(release_addr, 8);
>>
>>         mb();
>>
>> -       secondary_holding_pen();
>> +       soft_restart(0);
>>
>>         BUG();
>>  }
>> ###########################
>>
>> I can see that my secondary cpu's are not going to the exact same
>> state as they were in when I boot a SMP kernel from u-boot.
>>
>> [[They are not waiting for addr(secondary_holding_pen()) at
>>  release_addr]]
>>
>> Like geoff mentioned in another mail do we need to
>> to get back to EL2 before re-entering the bootwrapper program?
>

I am trying to kexec a UP-LE kernel from and SMP-LE kernel.

> As I mentioned we do need to ensure that the CPUs are in the mode they
> started in, though I'm not sure I follow what you mean by "not waiting".
> This could be an orthogonal issue.

If I verify the secondary CPUs from u-boot I can see that
they are all looping at

    Core number       : 1
    Core state        : debug (AArch64 EL2)
    Debug entry cause : External Debug Request
    Current PC        : 0x0000000000000238
    Current CPSR      : 0x200003c9 (EL2h)

But after the kexec calls soft_restar(0) for all secondary CPUs
they are looping at

    Core number       : 1
    Core state        : debug (AArch64 EL1)
    Debug entry cause : External Debug Request
    Current PC        : 0xffffffc000083200
    Current CPSR      : 0x600003c5 (EL1h)

This is what I mean by they are not waiting for
the secondary start-up address to jump.

>
> What exactly do you see, do the CPUs leave the spin-table, are they
> taking exceptions, are they getting stuck in the spin-table, etc?
>
They all are clearly resetting to address "0"(Put a breakpoint and
verified) but somehow they end up @0xffffffc000083200.
I still don't know why.

########
ffffffc00008319c:       d503201f        nop
        ...
ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
ffffffc000083204:       d503201f        nop
ffffffc000083208:       d503201f        nop
########

> Are the cpu-release-addr entries getting restored to zero before each
> CPU begins polling them?

Yes I can see

CPU#1>md 0x400000fff8
40_0000fff8 : 00000000


--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-20 13:57         ` Arun Chandran
@ 2014-08-20 14:16           ` Mark Rutland
  2014-08-21 13:34             ` Arun Chandran
  0 siblings, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-20 14:16 UTC (permalink / raw)
  To: linux-arm-kernel

[...]

> I am trying to kexec a UP-LE kernel from and SMP-LE kernel.
> 
> > As I mentioned we do need to ensure that the CPUs are in the mode they
> > started in, though I'm not sure I follow what you mean by "not waiting".
> > This could be an orthogonal issue.
> 
> If I verify the secondary CPUs from u-boot I can see that
> they are all looping at
> 
>     Core number       : 1
>     Core state        : debug (AArch64 EL2)
>     Debug entry cause : External Debug Request
>     Current PC        : 0x0000000000000238
>     Current CPSR      : 0x200003c9 (EL2h)
> 
> But after the kexec calls soft_restar(0) for all secondary CPUs
> they are looping at
> 
>     Core number       : 1
>     Core state        : debug (AArch64 EL1)
>     Debug entry cause : External Debug Request
>     Current PC        : 0xffffffc000083200
>     Current CPSR      : 0x600003c5 (EL1h)
> 
> This is what I mean by they are not waiting for
> the secondary start-up address to jump.

Ok.

> >
> > What exactly do you see, do the CPUs leave the spin-table, are they
> > taking exceptions, are they getting stuck in the spin-table, etc?
> >
> They all are clearly resetting to address "0"(Put a breakpoint and
> verified) but somehow they end up @0xffffffc000083200.
> I still don't know why.
> 
> ########
> ffffffc00008319c:       d503201f        nop
>         ...
> ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
> ffffffc000083204:       d503201f        nop
> ffffffc000083208:       d503201f        nop
> ########

That's the EL1 exception vector table.

What looks to be happening is that something causes the CPUs to take an
exception (at EL1). Because translation isn't enabled and the vector
address doesn't map to anything, they'll take some sort of exception.
Because the vectors aren't mapped that will go recursive.

Your spin-table implementation might be poking something that's not
accessible at EL1, in which case you require the jump to EL2 for
correctness. I can't say for certain either way.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-20 14:16           ` Mark Rutland
@ 2014-08-21 13:34             ` Arun Chandran
  2014-08-21 14:31               ` Mark Rutland
  0 siblings, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-21 13:34 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Wed, Aug 20, 2014 at 7:46 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> [...]
>
>> I am trying to kexec a UP-LE kernel from and SMP-LE kernel.
>>
>> > As I mentioned we do need to ensure that the CPUs are in the mode they
>> > started in, though I'm not sure I follow what you mean by "not waiting".
>> > This could be an orthogonal issue.
>>
>> If I verify the secondary CPUs from u-boot I can see that
>> they are all looping at
>>
>>     Core number       : 1
>>     Core state        : debug (AArch64 EL2)
>>     Debug entry cause : External Debug Request
>>     Current PC        : 0x0000000000000238
>>     Current CPSR      : 0x200003c9 (EL2h)
>>
>> But after the kexec calls soft_restar(0) for all secondary CPUs
>> they are looping at
>>
>>     Core number       : 1
>>     Core state        : debug (AArch64 EL1)
>>     Debug entry cause : External Debug Request
>>     Current PC        : 0xffffffc000083200
>>     Current CPSR      : 0x600003c5 (EL1h)
>>
>> This is what I mean by they are not waiting for
>> the secondary start-up address to jump.
>
> Ok.
>
>> >
>> > What exactly do you see, do the CPUs leave the spin-table, are they
>> > taking exceptions, are they getting stuck in the spin-table, etc?
>> >
>> They all are clearly resetting to address "0"(Put a breakpoint and
>> verified) but somehow they end up @0xffffffc000083200.
>> I still don't know why.
>>
>> ########
>> ffffffc00008319c:       d503201f        nop
>>         ...
>> ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
>> ffffffc000083204:       d503201f        nop
>> ffffffc000083208:       d503201f        nop
>> ########
>
> That's the EL1 exception vector table.
>
> What looks to be happening is that something causes the CPUs to take an
> exception (at EL1). Because translation isn't enabled and the vector
> address doesn't map to anything, they'll take some sort of exception.
> Because the vectors aren't mapped that will go recursive.
>
> Your spin-table implementation might be poking something that's not
> accessible at EL1, in which case you require the jump to EL2 for
> correctness. I can't say for certain either way.
>

Yes you are right I needed a jump to EL2 for putting the
secondary CPUs at correct spinning loop.

I made some progress with the below changes.

###########
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 607d4bb..8969922 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
 // Clear EE and E0E on LE systems
                      PSR_MODE_EL1h)
        msr     spsr_el2, x0
        msr     elr_el2, lr
+       mov     x0, #0x33ff
+       movk    x0, #0x801f, lsl #16
+       msr     cptr_el2, x0                    // Enable traps to el2
when CPACR or CPACR_EL1 is accessed
+       mov     x0, #3 << 20
+       msr     cpacr_el1, x0                   // Enable FP/ASIMD
        mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
        eret
 ENDPROC(el2_setup)
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..d8e806c 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -66,6 +66,14 @@ ENDPROC(el1_sync)

 .macro invalid_vector  label
 \label:
+       mov     x1, #0x33ff
+       msr     cptr_el2, x1                    // Disable copro. traps to EL2
+
+       ic      ialluis
+       isb
+       dsb     sy
+
+       br  x0
        b \label
 ENDPROC(\label)
 .endm
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 64733d4..8ca929c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -65,6 +65,12 @@ void soft_restart(unsigned long addr)
 //     smp_secondary_shutdown();
 #endif

+       /* Delay primary cpu; allow enough time for
+        * secondaries to enter spin loop
+        */
+       if (smp_processor_id() == 0)
+               mdelay(1000);
+
        cpu_soft_restart(virt_to_phys(cpu_reset), addr);

        /* Should never get here */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..f6ab468 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -76,6 +76,10 @@ ENTRY(cpu_reset)
 #if defined(CONFIG_SMP)
 /*     bl      secondary_shutdown */
 #endif
+
+       /* Trap to EL2 */
+       mov     x1, #3 << 20
+       msr     cpacr_el1, x1                   // Enable FP/ASIMD
        ret     x0
 ENDPROC(cpu_reset)

@@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
        tlbi    vmalle1is                       // invalidate I + D TLBs
        dsb     ish

-       mov     x0, #3 << 20
-       msr     cpacr_el1, x0                   // Enable FP/ASIMD
        msr     mdscr_el1, xzr                  // Reset mdscr_el1
        /*
         * Memory region attributes for LPAE:
#########

With the above code I was able to manually kexec
an SMP kernel (With the help of debugger).

Basically the branch instruction (br x0) is not working
in the el2 vector.
------------------
CPU#0>h
    Core number       : 0
    Core state        : debug (AArch64 EL2)
    Debug entry cause : External Debug Request
    Current PC        : 0x0000004000089800
    Current CPSR      : 0x600003c9 (EL2h)
CPU#0>rd
GPR00: 00000043eb891000 0000000000000018 0000000000000006 0000000000000004
GPR04: 000000000000001f 000000000000001b 0000000000000000 ffffffffffffffff
GPR08: ffffffc3ffe80614 ffffffffffffffff 0000000000000000 0000000000000002
GPR12: ffffffc0000968f0 00000040008c0000 00000043eb949002 ffffffffffffffff
GPR16: ffffffc0000c1244 0000000000435260 0000007ff9c5d490 00000040000968c0
GPR20: 00000043eb891000 ffffffc3eb891000 ffffffc3eb973c00 0000000000000110
GPR24: ffffffc000094000 ffffffc000094cd8 000000000000008e ffffffc00082f000
GPR28: ffffffc3e9888000 ffffffc3e988bd00 ffffffc000095d88 00000043efb24430
CPU#0>go 0x00000043eb891000
--------------------

I had to use the debugger  to make cpu0 jump to kexec-boot code.
Similarly I had to manually put other CPUs to spinning code
using debugger. Could you please throw some light here?

--Arun











> Thanks,
> Mark.
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-21 13:34             ` Arun Chandran
@ 2014-08-21 14:31               ` Mark Rutland
  2014-08-22 11:11                 ` Arun Chandran
  2014-08-26 13:00                 ` Arun Chandran
  0 siblings, 2 replies; 33+ messages in thread
From: Mark Rutland @ 2014-08-21 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 21, 2014 at 02:34:46PM +0100, Arun Chandran wrote:
> Hi Mark,
> 
> On Wed, Aug 20, 2014 at 7:46 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> > [...]
> >
> >> I am trying to kexec a UP-LE kernel from and SMP-LE kernel.
> >>
> >> > As I mentioned we do need to ensure that the CPUs are in the mode they
> >> > started in, though I'm not sure I follow what you mean by "not waiting".
> >> > This could be an orthogonal issue.
> >>
> >> If I verify the secondary CPUs from u-boot I can see that
> >> they are all looping at
> >>
> >>     Core number       : 1
> >>     Core state        : debug (AArch64 EL2)
> >>     Debug entry cause : External Debug Request
> >>     Current PC        : 0x0000000000000238
> >>     Current CPSR      : 0x200003c9 (EL2h)
> >>
> >> But after the kexec calls soft_restar(0) for all secondary CPUs
> >> they are looping at
> >>
> >>     Core number       : 1
> >>     Core state        : debug (AArch64 EL1)
> >>     Debug entry cause : External Debug Request
> >>     Current PC        : 0xffffffc000083200
> >>     Current CPSR      : 0x600003c5 (EL1h)
> >>
> >> This is what I mean by they are not waiting for
> >> the secondary start-up address to jump.
> >
> > Ok.
> >
> >> >
> >> > What exactly do you see, do the CPUs leave the spin-table, are they
> >> > taking exceptions, are they getting stuck in the spin-table, etc?
> >> >
> >> They all are clearly resetting to address "0"(Put a breakpoint and
> >> verified) but somehow they end up @0xffffffc000083200.
> >> I still don't know why.
> >>
> >> ########
> >> ffffffc00008319c:       d503201f        nop
> >>         ...
> >> ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
> >> ffffffc000083204:       d503201f        nop
> >> ffffffc000083208:       d503201f        nop
> >> ########
> >
> > That's the EL1 exception vector table.
> >
> > What looks to be happening is that something causes the CPUs to take an
> > exception (at EL1). Because translation isn't enabled and the vector
> > address doesn't map to anything, they'll take some sort of exception.
> > Because the vectors aren't mapped that will go recursive.
> >
> > Your spin-table implementation might be poking something that's not
> > accessible at EL1, in which case you require the jump to EL2 for
> > correctness. I can't say for certain either way.
> >
> 
> Yes you are right I needed a jump to EL2 for putting the
> secondary CPUs at correct spinning loop.

Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
up to EL2. As you point out below we need to synchronise with the CPUs
on their way out too we can add a spin-table cpu_kill with a slightly
dodgy delay to ensure that, given we have no other mechanism for
synchronising.

> I made some progress with the below changes.
> 
> ###########
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 607d4bb..8969922 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
>  // Clear EE and E0E on LE systems
>                       PSR_MODE_EL1h)
>         msr     spsr_el2, x0
>         msr     elr_el2, lr
> +       mov     x0, #0x33ff
> +       movk    x0, #0x801f, lsl #16
> +       msr     cptr_el2, x0                    // Enable traps to el2
> when CPACR or CPACR_EL1 is accessed
> +       mov     x0, #3 << 20
> +       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>         mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
>         eret
>  ENDPROC(el2_setup)
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index a272f33..d8e806c 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -66,6 +66,14 @@ ENDPROC(el1_sync)
> 
>  .macro invalid_vector  label
>  \label:
> +       mov     x1, #0x33ff
> +       msr     cptr_el2, x1                    // Disable copro. traps to EL2
> +
> +       ic      ialluis
> +       isb
> +       dsb     sy

As far as I am aware an "isb; dsb" sequence never makes sense. It should
be "dsb; isb". You want the I-side maintenance to complete (for which
you have the DSB) _then_ you want to synchronise the execution context
with the newly-visible instructions (for which you have the ISB).

> +
> +       br  x0
>         b \label
>  ENDPROC(\label)
>  .endm
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 64733d4..8ca929c 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -65,6 +65,12 @@ void soft_restart(unsigned long addr)
>  //     smp_secondary_shutdown();
>  #endif
> 
> +       /* Delay primary cpu; allow enough time for
> +        * secondaries to enter spin loop
> +        */
> +       if (smp_processor_id() == 0)
> +               mdelay(1000);
> +
>         cpu_soft_restart(virt_to_phys(cpu_reset), addr);
> 
>         /* Should never get here */
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 3cb6dec..f6ab468 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -76,6 +76,10 @@ ENTRY(cpu_reset)
>  #if defined(CONFIG_SMP)
>  /*     bl      secondary_shutdown */
>  #endif
> +
> +       /* Trap to EL2 */
> +       mov     x1, #3 << 20
> +       msr     cpacr_el1, x1                   // Enable FP/ASIMD
>         ret     x0
>  ENDPROC(cpu_reset)
> 
> @@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
>         tlbi    vmalle1is                       // invalidate I + D TLBs
>         dsb     ish
> 
> -       mov     x0, #3 << 20
> -       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>         msr     mdscr_el1, xzr                  // Reset mdscr_el1
>         /*
>          * Memory region attributes for LPAE:
> #########
> 
> With the above code I was able to manually kexec
> an SMP kernel (With the help of debugger).
> 
> Basically the branch instruction (br x0) is not working
> in the el2 vector.
> ------------------
> CPU#0>h
>     Core number       : 0
>     Core state        : debug (AArch64 EL2)
>     Debug entry cause : External Debug Request
>     Current PC        : 0x0000004000089800

Does this address look similar to the VBAR_EL2, perhaps?

>     Current CPSR      : 0x600003c9 (EL2h)
> CPU#0>rd
> GPR00: 00000043eb891000 0000000000000018 0000000000000006 0000000000000004
> GPR04: 000000000000001f 000000000000001b 0000000000000000 ffffffffffffffff
> GPR08: ffffffc3ffe80614 ffffffffffffffff 0000000000000000 0000000000000002
> GPR12: ffffffc0000968f0 00000040008c0000 00000043eb949002 ffffffffffffffff
> GPR16: ffffffc0000c1244 0000000000435260 0000007ff9c5d490 00000040000968c0
> GPR20: 00000043eb891000 ffffffc3eb891000 ffffffc3eb973c00 0000000000000110
> GPR24: ffffffc000094000 ffffffc000094cd8 000000000000008e ffffffc00082f000
> GPR28: ffffffc3e9888000 ffffffc3e988bd00 ffffffc000095d88 00000043efb24430
> CPU#0>go 0x00000043eb891000
> --------------------
> 
> I had to use the debugger  to make cpu0 jump to kexec-boot code.
> Similarly I had to manually put other CPUs to spinning code
> using debugger. Could you please throw some light here?

I really can't say. I know nothing of your platform or spin-table
implementation, and this is nothing like what I'd expect the jump to EL2
code to look like.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-21 14:31               ` Mark Rutland
@ 2014-08-22 11:11                 ` Arun Chandran
  2014-08-22 13:15                   ` Mark Rutland
  2014-08-26 13:00                 ` Arun Chandran
  1 sibling, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-22 11:11 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Thu, Aug 21, 2014 at 8:01 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Aug 21, 2014 at 02:34:46PM +0100, Arun Chandran wrote:
>> Hi Mark,
>>
>> On Wed, Aug 20, 2014 at 7:46 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > [...]
>> >
>> >> I am trying to kexec a UP-LE kernel from and SMP-LE kernel.
>> >>
>> >> > As I mentioned we do need to ensure that the CPUs are in the mode they
>> >> > started in, though I'm not sure I follow what you mean by "not waiting".
>> >> > This could be an orthogonal issue.
>> >>
>> >> If I verify the secondary CPUs from u-boot I can see that
>> >> they are all looping at
>> >>
>> >>     Core number       : 1
>> >>     Core state        : debug (AArch64 EL2)
>> >>     Debug entry cause : External Debug Request
>> >>     Current PC        : 0x0000000000000238
>> >>     Current CPSR      : 0x200003c9 (EL2h)
>> >>
>> >> But after the kexec calls soft_restar(0) for all secondary CPUs
>> >> they are looping at
>> >>
>> >>     Core number       : 1
>> >>     Core state        : debug (AArch64 EL1)
>> >>     Debug entry cause : External Debug Request
>> >>     Current PC        : 0xffffffc000083200
>> >>     Current CPSR      : 0x600003c5 (EL1h)
>> >>
>> >> This is what I mean by they are not waiting for
>> >> the secondary start-up address to jump.
>> >
>> > Ok.
>> >
>> >> >
>> >> > What exactly do you see, do the CPUs leave the spin-table, are they
>> >> > taking exceptions, are they getting stuck in the spin-table, etc?
>> >> >
>> >> They all are clearly resetting to address "0"(Put a breakpoint and
>> >> verified) but somehow they end up @0xffffffc000083200.
>> >> I still don't know why.
>> >>
>> >> ########
>> >> ffffffc00008319c:       d503201f        nop
>> >>         ...
>> >> ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
>> >> ffffffc000083204:       d503201f        nop
>> >> ffffffc000083208:       d503201f        nop
>> >> ########
>> >
>> > That's the EL1 exception vector table.
>> >
>> > What looks to be happening is that something causes the CPUs to take an
>> > exception (at EL1). Because translation isn't enabled and the vector
>> > address doesn't map to anything, they'll take some sort of exception.
>> > Because the vectors aren't mapped that will go recursive.
>> >
>> > Your spin-table implementation might be poking something that's not
>> > accessible at EL1, in which case you require the jump to EL2 for
>> > correctness. I can't say for certain either way.
>> >
>>
>> Yes you are right I needed a jump to EL2 for putting the
>> secondary CPUs at correct spinning loop.
>
> Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
> up to EL2. As you point out below we need to synchronise with the CPUs
> on their way out too we can add a spin-table cpu_kill with a slightly
> dodgy delay to ensure that, given we have no other mechanism for
> synchronising.
>
Ok

>
> As far as I am aware an "isb; dsb" sequence never makes sense. It should
> be "dsb; isb". You want the I-side maintenance to complete (for which
> you have the DSB) _then_ you want to synchronise the execution context
> with the newly-visible instructions (for which you have the ISB).
>
Thanks for the nice explanation.

Now I am able to kexec an SMP kernel with the below change.

##########
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 607d4bb..8969922 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
 // Clear EE and E0E on LE systems
                      PSR_MODE_EL1h)
        msr     spsr_el2, x0
        msr     elr_el2, lr
+       mov     x0, #0x33ff
+       movk    x0, #0x801f, lsl #16
+       msr     cptr_el2, x0                    // Enable traps to el2
when CPACR or CPACR_EL1 is accessed
+       mov     x0, #3 << 20
+       msr     cpacr_el1, x0                   // Enable FP/ASIMD
        mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
        eret
 ENDPROC(el2_setup)
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..1fb4c38 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -56,12 +56,25 @@ el1_sync:
        mrs     x1, esr_el2
        lsr     x1, x1, #26
        cmp     x1, #0x16
-       b.ne    2f                              // Not an HVC trap
+       b.ne    3f                              // Not an HVC trap
        cbz     x0, 1f
        msr     vbar_el2, x0                    // Set vbar_el2
        b       2f
 1:     mrs     x0, vbar_el2                    // Return vbar_el2
 2:     eret
+3:     cmp     x1, #0x18       // Traps to EL2 for CPACR access have this code
+       b.ne    2b
+
+       mov     x1, #0x33ff
+       msr     cptr_el2, x1                    // Disable copro. traps to EL2
+
+       msr     cntp_ctl_el0, xzr
+       msr     cntfrq_el0, xzr
+       ic      ialluis
+       dsb     sy
+       isb
+
+       br  x0
 ENDPROC(el1_sync)

 .macro invalid_vector  label
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 64733d4..77298c2 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -65,6 +65,14 @@ void soft_restart(unsigned long addr)
 //     smp_secondary_shutdown();
 #endif

+       /* Delay primary cpu; allow enough time for
+        * secondaries to enter spin loop
+        */
+#if defined(CONFIG_SMP)
+       if (smp_processor_id() == 0)
+               mdelay(1000);
+#endif
+
        cpu_soft_restart(virt_to_phys(cpu_reset), addr);

        /* Should never get here */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..f6ab468 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -76,6 +76,10 @@ ENTRY(cpu_reset)
 #if defined(CONFIG_SMP)
 /*     bl      secondary_shutdown */
 #endif
+
+       /* Trap to EL2 */
+       mov     x1, #3 << 20
+       msr     cpacr_el1, x1                   // Enable FP/ASIMD
        ret     x0
 ENDPROC(cpu_reset)

@@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
        tlbi    vmalle1is                       // invalidate I + D TLBs
        dsb     ish

-       mov     x0, #3 << 20
-       msr     cpacr_el1, x0                   // Enable FP/ASIMD
        msr     mdscr_el1, xzr                  // Reset mdscr_el1
        /*
         * Memory region attributes for LPAE:
##########

The two lines
+       msr     cntp_ctl_el0, xzr
+       msr     cntfrq_el0, xzr

were added for
##########
SANITY CHECK: Unexpected variation in cntfrq. Boot CPU:
0x00000002faf080, CPU1: 0x00000000000000
------------[ cut here ]------------
WARNING: CPU: 1 PID: 0@arch/arm64/kernel/cpuinfo.c:146
cpuinfo_store_cpu+0x2e8/0x14f4()
Unsupported CPU feature variation.
Modules linked in:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-rc4+ #196
Call trace:
[<ffffffc000087fd0>] dump_backtrace+0x0/0x130
[<ffffffc000088110>] show_stack+0x10/0x1c
[<ffffffc0004c03e0>] dump_stack+0x74/0xb8
[<ffffffc00009dfac>] warn_slowpath_common+0x8c/0xb4
[<ffffffc00009e078>] warn_slowpath_fmt_taint+0x4c/0x58
[<ffffffc00008adf0>] cpuinfo_store_cpu+0x2e4/0x14f4
[<ffffffc00008f7e0>] secondary_start_kernel+0xd4/0x120
---[ end trace 62c0cf48de286b1c ]---
CPU2: Booted secondary processor
Detected PIPT I-cache on CPU2
SANITY CHECK: Unexpected variation in cntfrq. Boot CPU:
0x00000002faf080, CPU2: 0x00000000000000
CPU3: Booted secondary processor
##########

does this mean we also need to stop/handle timers before
jumping to new kernel??

> I really can't say. I know nothing of your platform or spin-table
> implementation, and this is nothing like what I'd expect the jump to EL2
> code to look like.

Yes It was just for trying things out. Traping to EL2 and then jumping
to the kexec/SMP-spin code is not sane. And by the way
I did not find anyway to do mode change to el2 from el1 other than
trapping.

Could you please elaborate more about the way you expect
it to be working/implemented??.

--Arun

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-22 11:11                 ` Arun Chandran
@ 2014-08-22 13:15                   ` Mark Rutland
  2014-08-23 19:50                     ` Arun Chandran
  0 siblings, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-22 13:15 UTC (permalink / raw)
  To: linux-arm-kernel

[...]

> >> > Your spin-table implementation might be poking something that's not
> >> > accessible at EL1, in which case you require the jump to EL2 for
> >> > correctness. I can't say for certain either way.
> >> >
> >>
> >> Yes you are right I needed a jump to EL2 for putting the
> >> secondary CPUs at correct spinning loop.
> >
> > Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
> > up to EL2. As you point out below we need to synchronise with the CPUs
> > on their way out too we can add a spin-table cpu_kill with a slightly
> > dodgy delay to ensure that, given we have no other mechanism for
> > synchronising.
> >
> Ok
> 
> >
> > As far as I am aware an "isb; dsb" sequence never makes sense. It should
> > be "dsb; isb". You want the I-side maintenance to complete (for which
> > you have the DSB) _then_ you want to synchronise the execution context
> > with the newly-visible instructions (for which you have the ISB).
> >
> Thanks for the nice explanation.
> 
> Now I am able to kexec an SMP kernel with the below change.
> 
> ##########
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 607d4bb..8969922 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
>  // Clear EE and E0E on LE systems
>                       PSR_MODE_EL1h)
>         msr     spsr_el2, x0
>         msr     elr_el2, lr
> +       mov     x0, #0x33ff
> +       movk    x0, #0x801f, lsl #16
> +       msr     cptr_el2, x0                    // Enable traps to el2
> when CPACR or CPACR_EL1 is accessed
> +       mov     x0, #3 << 20
> +       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>         mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
>         eret
>  ENDPROC(el2_setup)
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index a272f33..1fb4c38 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -56,12 +56,25 @@ el1_sync:
>         mrs     x1, esr_el2
>         lsr     x1, x1, #26
>         cmp     x1, #0x16
> -       b.ne    2f                              // Not an HVC trap
> +       b.ne    3f                              // Not an HVC trap
>         cbz     x0, 1f
>         msr     vbar_el2, x0                    // Set vbar_el2
>         b       2f
>  1:     mrs     x0, vbar_el2                    // Return vbar_el2
>  2:     eret
> +3:     cmp     x1, #0x18       // Traps to EL2 for CPACR access have this code
> +       b.ne    2b
> +
> +       mov     x1, #0x33ff
> +       msr     cptr_el2, x1                    // Disable copro. traps to EL2
> +
> +       msr     cntp_ctl_el0, xzr
> +       msr     cntfrq_el0, xzr
> +       ic      ialluis
> +       dsb     sy
> +       isb
> +
> +       br  x0
>  ENDPROC(el1_sync)
> 
>  .macro invalid_vector  label
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 64733d4..77298c2 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -65,6 +65,14 @@ void soft_restart(unsigned long addr)
>  //     smp_secondary_shutdown();
>  #endif
> 
> +       /* Delay primary cpu; allow enough time for
> +        * secondaries to enter spin loop
> +        */
> +#if defined(CONFIG_SMP)
> +       if (smp_processor_id() == 0)
> +               mdelay(1000);
> +#endif
> +
>         cpu_soft_restart(virt_to_phys(cpu_reset), addr);
> 
>         /* Should never get here */
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 3cb6dec..f6ab468 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -76,6 +76,10 @@ ENTRY(cpu_reset)
>  #if defined(CONFIG_SMP)
>  /*     bl      secondary_shutdown */
>  #endif
> +
> +       /* Trap to EL2 */
> +       mov     x1, #3 << 20
> +       msr     cpacr_el1, x1                   // Enable FP/ASIMD
>         ret     x0
>  ENDPROC(cpu_reset)
> 
> @@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
>         tlbi    vmalle1is                       // invalidate I + D TLBs
>         dsb     ish
> 
> -       mov     x0, #3 << 20
> -       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>         msr     mdscr_el1, xzr                  // Reset mdscr_el1
>         /*
>          * Memory region attributes for LPAE:
> ##########
> 
> The two lines
> +       msr     cntp_ctl_el0, xzr
> +       msr     cntfrq_el0, xzr

The kernel should _NEVER_ write to CNTFRQ. It is not guaranteed to be
accessible from the non-secure side. The firmware should have configured
this appropriately.

If the firmware does not configure CNTFRQ correctly then it is buggy and
must be fixed.

Overriding the CNTFRQ to zero is completely broken.

> were added for
> ##########
> SANITY CHECK: Unexpected variation in cntfrq. Boot CPU:
> 0x00000002faf080, CPU1: 0x00000000000000
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 0 at arch/arm64/kernel/cpuinfo.c:146
> cpuinfo_store_cpu+0x2e8/0x14f4()
> Unsupported CPU feature variation.
> Modules linked in:
> CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-rc4+ #196
> Call trace:
> [<ffffffc000087fd0>] dump_backtrace+0x0/0x130
> [<ffffffc000088110>] show_stack+0x10/0x1c
> [<ffffffc0004c03e0>] dump_stack+0x74/0xb8
> [<ffffffc00009dfac>] warn_slowpath_common+0x8c/0xb4
> [<ffffffc00009e078>] warn_slowpath_fmt_taint+0x4c/0x58
> [<ffffffc00008adf0>] cpuinfo_store_cpu+0x2e4/0x14f4
> [<ffffffc00008f7e0>] secondary_start_kernel+0xd4/0x120
> ---[ end trace 62c0cf48de286b1c ]---
> CPU2: Booted secondary processor
> Detected PIPT I-cache on CPU2
> SANITY CHECK: Unexpected variation in cntfrq. Boot CPU:
> 0x00000002faf080, CPU2: 0x00000000000000
> CPU3: Booted secondary processor
> ##########
> 
> does this mean we also need to stop/handle timers before
> jumping to new kernel??

The value of CNTFRQ has nothing to do with whether the timers are
enabled. If we don't already disabling the timers I guess we could as a
courtesy to the next kernel, but interrupts should be masked and the
next kernel should be able to handle it anyway. That would all live in
the timer driver anyhow.

Do you see the sanity check failure on a cold boot (with the same
kernel)?

 - If so, your firmware is not configuring CNTFRQ appropriately as it
   MUST do. Virtualization IS broken on your system. This is a bug in
   the firmware.

 - If not, then something in the spin-table's cpu return path is
   resulting in CPUs being reconfigured, or reset and not configured as
   with cold boot. This is a bug in the firmware.

The firmware MUST configure CNTFRQ. It must do so regardless of
cold/warm boot. A clock-frequency property in the DT is insufficient;
it's a half-baked workaround that's especially problematic if you boot
at EL2.

> > I really can't say. I know nothing of your platform or spin-table
> > implementation, and this is nothing like what I'd expect the jump to EL2
> > code to look like.
> 
> Yes It was just for trying things out. Traping to EL2 and then jumping
> to the kexec/SMP-spin code is not sane. And by the way
> I did not find anyway to do mode change to el2 from el1 other than
> trapping.
> 
> Could you please elaborate more about the way you expect
> it to be working/implemented??.

Elsewhere in this thread I gave an outline of how I would expect this to
be implemented with modifications to KVM and the hyp stub.

Obviously some trap will be taken to EL2, but that should be as a result
of an explicit HVC rather than an access to the CPACR, and it should
only occur when the kernel was booted at EL2.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-22 13:15                   ` Mark Rutland
@ 2014-08-23 19:50                     ` Arun Chandran
  0 siblings, 0 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-23 19:50 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Fri, Aug 22, 2014 at 6:45 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> [...]
>
>> >> > Your spin-table implementation might be poking something that's not
>> >> > accessible at EL1, in which case you require the jump to EL2 for
>> >> > correctness. I can't say for certain either way.
>> >> >
>> >>
>> >> Yes you are right I needed a jump to EL2 for putting the
>> >> secondary CPUs at correct spinning loop.
>> >
>> > Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
>> > up to EL2. As you point out below we need to synchronise with the CPUs
>> > on their way out too we can add a spin-table cpu_kill with a slightly
>> > dodgy delay to ensure that, given we have no other mechanism for
>> > synchronising.
>> >
>> Ok
>>
>> >
>> > As far as I am aware an "isb; dsb" sequence never makes sense. It should
>> > be "dsb; isb". You want the I-side maintenance to complete (for which
>> > you have the DSB) _then_ you want to synchronise the execution context
>> > with the newly-visible instructions (for which you have the ISB).
>> >
>> Thanks for the nice explanation.
>>
>> Now I am able to kexec an SMP kernel with the below change.
>>
>> ##########
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 607d4bb..8969922 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
>>  // Clear EE and E0E on LE systems
>>                       PSR_MODE_EL1h)
>>         msr     spsr_el2, x0
>>         msr     elr_el2, lr
>> +       mov     x0, #0x33ff
>> +       movk    x0, #0x801f, lsl #16
>> +       msr     cptr_el2, x0                    // Enable traps to el2
>> when CPACR or CPACR_EL1 is accessed
>> +       mov     x0, #3 << 20
>> +       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>>         mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
>>         eret
>>  ENDPROC(el2_setup)
>> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
>> index a272f33..1fb4c38 100644
>> --- a/arch/arm64/kernel/hyp-stub.S
>> +++ b/arch/arm64/kernel/hyp-stub.S
>> @@ -56,12 +56,25 @@ el1_sync:
>>         mrs     x1, esr_el2
>>         lsr     x1, x1, #26
>>         cmp     x1, #0x16
>> -       b.ne    2f                              // Not an HVC trap
>> +       b.ne    3f                              // Not an HVC trap
>>         cbz     x0, 1f
>>         msr     vbar_el2, x0                    // Set vbar_el2
>>         b       2f
>>  1:     mrs     x0, vbar_el2                    // Return vbar_el2
>>  2:     eret
>> +3:     cmp     x1, #0x18       // Traps to EL2 for CPACR access have this code
>> +       b.ne    2b
>> +
>> +       mov     x1, #0x33ff
>> +       msr     cptr_el2, x1                    // Disable copro. traps to EL2
>> +
>> +       msr     cntp_ctl_el0, xzr
>> +       msr     cntfrq_el0, xzr
>> +       ic      ialluis
>> +       dsb     sy
>> +       isb
>> +
>> +       br  x0
>>  ENDPROC(el1_sync)
>>
>>  .macro invalid_vector  label
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 64733d4..77298c2 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -65,6 +65,14 @@ void soft_restart(unsigned long addr)
>>  //     smp_secondary_shutdown();
>>  #endif
>>
>> +       /* Delay primary cpu; allow enough time for
>> +        * secondaries to enter spin loop
>> +        */
>> +#if defined(CONFIG_SMP)
>> +       if (smp_processor_id() == 0)
>> +               mdelay(1000);
>> +#endif
>> +
>>         cpu_soft_restart(virt_to_phys(cpu_reset), addr);
>>
>>         /* Should never get here */
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 3cb6dec..f6ab468 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -76,6 +76,10 @@ ENTRY(cpu_reset)
>>  #if defined(CONFIG_SMP)
>>  /*     bl      secondary_shutdown */
>>  #endif
>> +
>> +       /* Trap to EL2 */
>> +       mov     x1, #3 << 20
>> +       msr     cpacr_el1, x1                   // Enable FP/ASIMD
>>         ret     x0
>>  ENDPROC(cpu_reset)
>>
>> @@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
>>         tlbi    vmalle1is                       // invalidate I + D TLBs
>>         dsb     ish
>>
>> -       mov     x0, #3 << 20
>> -       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>>         msr     mdscr_el1, xzr                  // Reset mdscr_el1
>>         /*
>>          * Memory region attributes for LPAE:
>> ##########
>>
>> The two lines
>> +       msr     cntp_ctl_el0, xzr
>> +       msr     cntfrq_el0, xzr
>
> The kernel should _NEVER_ write to CNTFRQ. It is not guaranteed to be
> accessible from the non-secure side. The firmware should have configured
> this appropriately.
>
> If the firmware does not configure CNTFRQ correctly then it is buggy and
> must be fixed.
>
> Overriding the CNTFRQ to zero is completely broken.
>
>> were added for
>> ##########
>> SANITY CHECK: Unexpected variation in cntfrq. Boot CPU:
>> 0x00000002faf080, CPU1: 0x00000000000000
>> ------------[ cut here ]------------
>> WARNING: CPU: 1 PID: 0 at arch/arm64/kernel/cpuinfo.c:146
>> cpuinfo_store_cpu+0x2e8/0x14f4()
>> Unsupported CPU feature variation.
>> Modules linked in:
>> CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-rc4+ #196
>> Call trace:
>> [<ffffffc000087fd0>] dump_backtrace+0x0/0x130
>> [<ffffffc000088110>] show_stack+0x10/0x1c
>> [<ffffffc0004c03e0>] dump_stack+0x74/0xb8
>> [<ffffffc00009dfac>] warn_slowpath_common+0x8c/0xb4
>> [<ffffffc00009e078>] warn_slowpath_fmt_taint+0x4c/0x58
>> [<ffffffc00008adf0>] cpuinfo_store_cpu+0x2e4/0x14f4
>> [<ffffffc00008f7e0>] secondary_start_kernel+0xd4/0x120
>> ---[ end trace 62c0cf48de286b1c ]---
>> CPU2: Booted secondary processor
>> Detected PIPT I-cache on CPU2
>> SANITY CHECK: Unexpected variation in cntfrq. Boot CPU:
>> 0x00000002faf080, CPU2: 0x00000000000000
>> CPU3: Booted secondary processor
>> ##########
>>
>> does this mean we also need to stop/handle timers before
>> jumping to new kernel??
>
> The value of CNTFRQ has nothing to do with whether the timers are
> enabled. If we don't already disabling the timers I guess we could as a
> courtesy to the next kernel, but interrupts should be masked and the
> next kernel should be able to handle it anyway. That would all live in
> the timer driver anyhow.
>
> Do you see the sanity check failure on a cold boot (with the same
> kernel)?
>

No I don't see the failure on cold boot.

>  - If so, your firmware is not configuring CNTFRQ appropriately as it
>    MUST do. Virtualization IS broken on your system. This is a bug in
>    the firmware.
>
>  - If not, then something in the spin-table's cpu return path is
>    resulting in CPUs being reconfigured, or reset and not configured as
>    with cold boot. This is a bug in the firmware.

I will report this to my hardware guys.
>
> The firmware MUST configure CNTFRQ. It must do so regardless of
> cold/warm boot. A clock-frequency property in the DT is insufficient;
> it's a half-baked workaround that's especially problematic if you boot
> at EL2.
>
>> > I really can't say. I know nothing of your platform or spin-table
>> > implementation, and this is nothing like what I'd expect the jump to EL2
>> > code to look like.
>>
>> Yes It was just for trying things out. Traping to EL2 and then jumping
>> to the kexec/SMP-spin code is not sane. And by the way
>> I did not find anyway to do mode change to el2 from el1 other than
>> trapping.
>>
>> Could you please elaborate more about the way you expect
>> it to be working/implemented??.
>
> Elsewhere in this thread I gave an outline of how I would expect this to
> be implemented with modifications to KVM and the hyp stub.
>
OK. I will give it a try.

> Obviously some trap will be taken to EL2, but that should be as a result
> of an explicit HVC rather than an access to the CPACR, and it should
> only occur when the kernel was booted at EL2.
>
Ok. Thanks for the nice explanation.

--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-18 16:02       ` Mark Rutland
  2014-08-18 17:33         ` Christoffer Dall
@ 2014-08-25 11:04         ` Arun Chandran
  2014-08-25 14:14         ` Arun Chandran
  2 siblings, 0 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-25 11:04 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark

On Mon, Aug 18, 2014 at 9:32 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Geoff,
>
> On Fri, Aug 15, 2014 at 07:53:19PM +0100, Geoff Levand wrote:
>> Hi Mark,
>>
>> On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
>> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> > > For the cpu-ops shutdown I'm working on I need a call to move the
>> > > secondary processors to an identity mapped spin loop after the identity
>> > > map is enabled.  I want to do this in C code, so it needs to happen
>> > > after the identity map is enabled, and before the dcache is disabled.
>> > >
>> > > I think to do this we can keep the existing soft_restart(addr) routine
>> > > with something like this:
>> > >
>> > > void soft_restart(unsigned long addr)
>> > > {
>> > >   setup_mm_for_reboot();
>> > >
>> > > #if defined(CONFIG_SMP)
>> > >   smp_secondary_shutdown();
>> > > #endif
>> > >
>> > >   cpu_soft_restart(addr);
>> > >
>> > >   /* Should never get here */
>> > >   BUG();
>> > > }
>> > >
>> >
>> > I don't follow why you need a hook in the middle of soft_restart. That
>> > sounds like a layering violation to me.
>> >
>> > I assume this is for implementing the spin-table cpu-return-addr idea?
>>
>> Yes.
>>
>> > If so, what's wrong with something like:
>>
>> > void spin_table_cpu_die(unsigned int cpu)
>> > {
>> >     unsigned long release_addr = per_cpu(return_addr, cpu);
>> >
>> >     /*
>> >      * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>> >      * something similar as these are all context synchronising and
>> >      * therefore expensive.
>> >      */
>> >     local_dbg_disable();
>> >     local_async_disable();
>> >     local_fiq_disable();
>> >     arch_local_irq_disable();
>> >
>> >     soft_restart(release_addr);
>> > }
>>
>> OK, this is a much simpler way than what I was thinking, which
>> was to have the secondaries spin in the kernel until the main
>> cpu shutdown.  I'll switch over to this, thanks.
>
> I just realised that this is still missing the jump to EL2 that I
> mentioned a while back.
>
> I think what we need to do is:
>
> * Have KVM (if present) tears itself down prior to cpu_die, restoring
>   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
>
> * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
>   call a function at EL2. We should be able to replace the current
>   hyp_stub el1_sync handler with that, and rework KVM to call a function
>   at EL2 to setup VBAR_EL2 appropriately at init time.
>
> * Depending on whether EL2 is available, go via soft_restart or the
>   hypercall to cpu_soft_restart (or something very close to it).
>
> How does that sound?

Hi Mark,

What about the implementation below?

##############
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 4371f45..799ca8a 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -52,7 +52,7 @@ static inline void sync_boot_mode(void)
        sync_cache_r(&__boot_cpu_mode);
 }

-void __hyp_set_vectors(unsigned long phys_vector_base);
+void __hyp_func_call(unsigned long addr);
 unsigned long __hyp_get_vectors(void);
 #else
 #define __boot_cpu_mode        (SVC_MODE)
diff --git a/arch/arm64/include/asm/proc-fns.h
b/arch/arm64/include/asm/proc-fns.h
index ddbc3f5..40d3360 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -31,7 +31,7 @@ struct cpu_suspend_ctx;
 extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
-extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_reset(unsigned long addr, unsigned long boot_mode)
__attribute__((noreturn));
 extern void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long
addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..4dd86b4 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -54,13 +54,20 @@ ENDPROC(__hyp_stub_vectors)

 el1_sync:
        mrs     x1, esr_el2
-       lsr     x1, x1, #26
-       cmp     x1, #0x16
+       lsr     x2, x1, #26
+       cmp     x2, #0x16
        b.ne    2f                              // Not an HVC trap
-       cbz     x0, 1f
-       msr     vbar_el2, x0                    // Set vbar_el2
+
+       and     x1, x1, #1                      // x1=1 -> Func call;
x1=0 -> get_vectors
+       cbnz    x1, 1f
+       mrs     x0, vbar_el2                    // Return vbar_el2
        b       2f
-1:     mrs     x0, vbar_el2                    // Return vbar_el2
+
+       /* Fluch I-cache before calling function @x0 */
+1:     ic      ialluis
+       dsb     sy
+       isb
+       blr     x0
 2:     eret
 ENDPROC(el1_sync)

@@ -101,10 +108,12 @@ ENDPROC(\label)
  */

 ENTRY(__hyp_get_vectors)
-       mov     x0, xzr
-       // fall through
-ENTRY(__hyp_set_vectors)
        hvc     #0
        ret
 ENDPROC(__hyp_get_vectors)
-ENDPROC(__hyp_set_vectors)
+
+/* Call a function @x0 */
+ENTRY(__hyp_func_call)
+       hvc     #1
+       ret
+ENDPROC(__hyp_func_call)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 64733d4..77298c2 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -65,6 +65,14 @@ void soft_restart(unsigned long addr)
 //     smp_secondary_shutdown();
 #endif

+       /* Delay primary cpu; allow enough time for
+        * secondaries to enter spin loop
+        */
+#if defined(CONFIG_SMP)
+       if (smp_processor_id() == 0)
+               mdelay(1000);
+#endif
+
        cpu_soft_restart(virt_to_phys(cpu_reset), addr);

        /* Should never get here */

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..74e11c2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>

 #include "proc-macros.S"

@@ -69,19 +70,25 @@ ENDPROC(cpu_cache_off)
  */
        .align  5
 ENTRY(cpu_reset)
-       mrs     x1, sctlr_el1
-       bic     x1, x1, #1
-       msr     sctlr_el1, x1                   // disable the MMU
+       mrs     x2, sctlr_el1
+       bic     x2, x2, #1
+       msr     sctlr_el1, x2                   // disable the MMU
        isb
 #if defined(CONFIG_SMP)
 /*     bl      secondary_shutdown */
 #endif
+       sub     w1, w1, #BOOT_CPU_MODE_EL2
+       cbz     w1, __hyp_func_call
        ret     x0
 ENDPROC(cpu_reset)

 ENTRY(cpu_soft_restart)
+       ldr     x3, =__boot_cpu_mode
+       ldr     w2, [x3]
+
        mov     x19, x0
        mov     x20, x1
+       mov     w21, w2

        /* Turn D-cache off */
        bl      cpu_cache_off
@@ -89,6 +96,7 @@ ENTRY(cpu_soft_restart)
        bl      flush_cache_all

        mov     x0, x20
+       mov     w1, w21
        ret     x19
 ENDPROC(cpu_soft_restart)

###################

Am I anywhere close to your idea?

--Arun

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-18 16:02       ` Mark Rutland
  2014-08-18 17:33         ` Christoffer Dall
  2014-08-25 11:04         ` Arun Chandran
@ 2014-08-25 14:14         ` Arun Chandran
  2014-08-26 15:22           ` Mark Rutland
  2 siblings, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-25 14:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Aug 18, 2014 at 9:32 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Geoff,
>
> On Fri, Aug 15, 2014 at 07:53:19PM +0100, Geoff Levand wrote:
>> Hi Mark,
>>
>> On Fri, 2014-08-15 at 19:21 +0100, Mark Rutland wrote:
>> > On Fri, Aug 15, 2014 at 06:20:21PM +0100, Geoff Levand wrote:
>> > > For the cpu-ops shutdown I'm working on I need a call to move the
>> > > secondary processors to an identity mapped spin loop after the identity
>> > > map is enabled.  I want to do this in C code, so it needs to happen
>> > > after the identity map is enabled, and before the dcache is disabled.
>> > >
>> > > I think to do this we can keep the existing soft_restart(addr) routine
>> > > with something like this:
>> > >
>> > > void soft_restart(unsigned long addr)
>> > > {
>> > >   setup_mm_for_reboot();
>> > >
>> > > #if defined(CONFIG_SMP)
>> > >   smp_secondary_shutdown();
>> > > #endif
>> > >
>> > >   cpu_soft_restart(addr);
>> > >
>> > >   /* Should never get here */
>> > >   BUG();
>> > > }
>> > >
>> >
>> > I don't follow why you need a hook in the middle of soft_restart. That
>> > sounds like a layering violation to me.
>> >
>> > I assume this is for implementing the spin-table cpu-return-addr idea?
>>
>> Yes.
>>
>> > If so, what's wrong with something like:
>>
>> > void spin_table_cpu_die(unsigned int cpu)
>> > {
>> >     unsigned long release_addr = per_cpu(return_addr, cpu);
>> >
>> >     /*
>> >      * We should have a local_disable(DBG|ASYNC|FIQ|IRQ) function or
>> >      * something similar as these are all context synchronising and
>> >      * therefore expensive.
>> >      */
>> >     local_dbg_disable();
>> >     local_async_disable();
>> >     local_fiq_disable();
>> >     arch_local_irq_disable();
>> >
>> >     soft_restart(release_addr);
>> > }
>>
>> OK, this is a much simpler way than what I was thinking, which
>> was to have the secondaries spin in the kernel until the main
>> cpu shutdown.  I'll switch over to this, thanks.
>
> I just realised that this is still missing the jump to EL2 that I
> mentioned a while back.
>
> I think what we need to do is:
>
> * Have KVM (if present) tears itself down prior to cpu_die, restoring
>   the __hyp_stub_vectors in VBAR_EL2 and disabling the MMU, and caches.
>
> * Add a mechanism to __hyp_stub_vectors to allow a hypercall to
>   call a function at EL2. We should be able to replace the current
>   hyp_stub el1_sync handler with that, and rework KVM to call a function
>   at EL2 to setup VBAR_EL2 appropriately at init time.
>
> * Depending on whether EL2 is available, go via soft_restart or the
>   hypercall to cpu_soft_restart (or something very close to it).
>
> How does that sound?

Hi Mark,

Please Ignore my previous mail. I think this version of sample
implementation is simple and better. Please give your comments.

###########
diff --git a/arch/arm64/include/asm/proc-fns.h
b/arch/arm64/include/asm/proc-fns.h
index ddbc3f5..40d3360 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -31,7 +31,7 @@ struct cpu_suspend_ctx;
 extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
-extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_reset(unsigned long addr, unsigned long boot_mode)
__attribute__((noreturn));
 extern void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long
addr) __attribute__((noreturn));
 extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
 extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 215ad46..3976737 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -34,6 +34,7 @@
  */
 extern u32 __boot_cpu_mode[2];

+void __hyp_func_call(phys_addr_t func, ...);
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..82c2b0d 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -53,14 +53,12 @@ ENDPROC(__hyp_stub_vectors)
        .align 11

 el1_sync:
-       mrs     x1, esr_el2
-       lsr     x1, x1, #26
-       cmp     x1, #0x16
+       mrs     x19, esr_el2
+       lsr     x19, x19, #26
+       cmp     x19, #0x16
        b.ne    2f                              // Not an HVC trap
-       cbz     x0, 1f
-       msr     vbar_el2, x0                    // Set vbar_el2
-       b       2f
-1:     mrs     x0, vbar_el2                    // Return vbar_el2
+
+1:     blr     x0                              // Jump to the function
 2:     eret
 ENDPROC(el1_sync)

@@ -101,10 +99,17 @@ ENDPROC(\label)
  */

 ENTRY(__hyp_get_vectors)
-       mov     x0, xzr
-       // fall through
-ENTRY(__hyp_set_vectors)
-       hvc     #0
+       mrs     x0, vbar_el2                    // Return vbar_el2
        ret
 ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+       msr     vbar_el2, x1
+       ret
 ENDPROC(__hyp_set_vectors)
+
+/* Call a function @x0 */
+ENTRY(__hyp_func_call)
+       hvc     #0
+       ret
+ENDPROC(__hyp_func_call)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3cb6dec..e68f42e 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -25,6 +25,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/virt.h>

 #include "proc-macros.S"

@@ -69,19 +70,26 @@ ENDPROC(cpu_cache_off)
  */
        .align  5
 ENTRY(cpu_reset)
-       mrs     x1, sctlr_el1
-       bic     x1, x1, #1
-       msr     sctlr_el1, x1                   // disable the MMU
+       mrs     x2, sctlr_el1
+       bic     x2, x2, #1
+       msr     sctlr_el1, x2                   // disable the MMU
        isb
 #if defined(CONFIG_SMP)
 /*     bl      secondary_shutdown */
 #endif
+       sub     w1, w1, #BOOT_CPU_MODE_EL2
+       cbz     w1, __hyp_func_call
        ret     x0
 ENDPROC(cpu_reset)

 ENTRY(cpu_soft_restart)
+       ldr     x3, =__boot_cpu_mode
+       add     x3, x3, #4
+       ldr     w2, [x3]
+
        mov     x19, x0
        mov     x20, x1
+       mov     w21, w2

        /* Turn D-cache off */
        bl      cpu_cache_off
@@ -89,6 +97,7 @@ ENTRY(cpu_soft_restart)
        bl      flush_cache_all

        mov     x0, x20
+       mov     w1, w21
        ret     x19
 ENDPROC(cpu_soft_restart)
#########

I have implemented __hyp_func_call; other userscan
make use of it to call __hyp_set_vectors/__hyp_get_vectors

--Arun

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-21 14:31               ` Mark Rutland
  2014-08-22 11:11                 ` Arun Chandran
@ 2014-08-26 13:00                 ` Arun Chandran
  2014-08-26 14:08                   ` Mark Rutland
  1 sibling, 1 reply; 33+ messages in thread
From: Arun Chandran @ 2014-08-26 13:00 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 21, 2014 at 8:01 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Aug 21, 2014 at 02:34:46PM +0100, Arun Chandran wrote:
>> Hi Mark,
>>
>> On Wed, Aug 20, 2014 at 7:46 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > [...]
>> >
>> >> I am trying to kexec a UP-LE kernel from and SMP-LE kernel.
>> >>
>> >> > As I mentioned we do need to ensure that the CPUs are in the mode they
>> >> > started in, though I'm not sure I follow what you mean by "not waiting".
>> >> > This could be an orthogonal issue.
>> >>
>> >> If I verify the secondary CPUs from u-boot I can see that
>> >> they are all looping at
>> >>
>> >>     Core number       : 1
>> >>     Core state        : debug (AArch64 EL2)
>> >>     Debug entry cause : External Debug Request
>> >>     Current PC        : 0x0000000000000238
>> >>     Current CPSR      : 0x200003c9 (EL2h)
>> >>
>> >> But after the kexec calls soft_restar(0) for all secondary CPUs
>> >> they are looping at
>> >>
>> >>     Core number       : 1
>> >>     Core state        : debug (AArch64 EL1)
>> >>     Debug entry cause : External Debug Request
>> >>     Current PC        : 0xffffffc000083200
>> >>     Current CPSR      : 0x600003c5 (EL1h)
>> >>
>> >> This is what I mean by they are not waiting for
>> >> the secondary start-up address to jump.
>> >
>> > Ok.
>> >
>> >> >
>> >> > What exactly do you see, do the CPUs leave the spin-table, are they
>> >> > taking exceptions, are they getting stuck in the spin-table, etc?
>> >> >
>> >> They all are clearly resetting to address "0"(Put a breakpoint and
>> >> verified) but somehow they end up @0xffffffc000083200.
>> >> I still don't know why.
>> >>
>> >> ########
>> >> ffffffc00008319c:       d503201f        nop
>> >>         ...
>> >> ffffffc000083200:       14000260        b       ffffffc000083b80 <el1_sync>
>> >> ffffffc000083204:       d503201f        nop
>> >> ffffffc000083208:       d503201f        nop
>> >> ########
>> >
>> > That's the EL1 exception vector table.
>> >
>> > What looks to be happening is that something causes the CPUs to take an
>> > exception (at EL1). Because translation isn't enabled and the vector
>> > address doesn't map to anything, they'll take some sort of exception.
>> > Because the vectors aren't mapped that will go recursive.
>> >
>> > Your spin-table implementation might be poking something that's not
>> > accessible at EL1, in which case you require the jump to EL2 for
>> > correctness. I can't say for certain either way.
>> >
>>
>> Yes you are right I needed a jump to EL2 for putting the
>> secondary CPUs at correct spinning loop.
>
> Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
> up to EL2. As you point out below we need to synchronise with the CPUs
> on their way out too we can add a spin-table cpu_kill with a slightly
> dodgy delay to ensure that, given we have no other mechanism for
> synchronising.
>

I was able to remove the delay by pushing "set_cpu_online(cpu, false);"
further down.

############
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3fb64cb..200e49e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -543,8 +543,6 @@ static void ipi_cpu_stop(unsigned int cpu)
                raw_spin_unlock(&stop_lock);
        }

-       set_cpu_online(cpu, false);
-
        local_irq_disable();

        /* If we have the cpu ops use them. */
@@ -554,6 +552,8 @@ static void ipi_cpu_stop(unsigned int cpu)
                && !cpu_ops[cpu]->cpu_disable(cpu))
                cpu_ops[cpu]->cpu_die(cpu);

+
+       set_cpu_online(cpu, false);
        /* Otherwise spin here. */

        while (1)
diff --git a/arch/arm64/kernel/smp_spin_table.c
b/arch/arm64/kernel/smp_spin_table.c
index e7275b3..8dcca88 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -149,6 +149,7 @@ static void smp_spin_table_cpu_die(unsigned int cpu)
        *release_addr = 0;
        __flush_dcache_area(release_addr, 8);

+       set_cpu_online(cpu, false);
        mb();

        soft_restart(0);
##############

This will

a) Make the waiting loop inside smp_send_stop() more meaningful

b) Make sure that at least cpu-release-addr is invalidated.

--Arun


>> I made some progress with the below changes.
>>
>> ###########
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 607d4bb..8969922 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -343,6 +343,11 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )
>>  // Clear EE and E0E on LE systems
>>                       PSR_MODE_EL1h)
>>         msr     spsr_el2, x0
>>         msr     elr_el2, lr
>> +       mov     x0, #0x33ff
>> +       movk    x0, #0x801f, lsl #16
>> +       msr     cptr_el2, x0                    // Enable traps to el2
>> when CPACR or CPACR_EL1 is accessed
>> +       mov     x0, #3 << 20
>> +       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>>         mov     w20, #BOOT_CPU_MODE_EL2         // This CPU booted in EL2
>>         eret
>>  ENDPROC(el2_setup)
>> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
>> index a272f33..d8e806c 100644
>> --- a/arch/arm64/kernel/hyp-stub.S
>> +++ b/arch/arm64/kernel/hyp-stub.S
>> @@ -66,6 +66,14 @@ ENDPROC(el1_sync)
>>
>>  .macro invalid_vector  label
>>  \label:
>> +       mov     x1, #0x33ff
>> +       msr     cptr_el2, x1                    // Disable copro. traps to EL2
>> +
>> +       ic      ialluis
>> +       isb
>> +       dsb     sy
>
> As far as I am aware an "isb; dsb" sequence never makes sense. It should
> be "dsb; isb". You want the I-side maintenance to complete (for which
> you have the DSB) _then_ you want to synchronise the execution context
> with the newly-visible instructions (for which you have the ISB).
>
>> +
>> +       br  x0
>>         b \label
>>  ENDPROC(\label)
>>  .endm
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 64733d4..8ca929c 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -65,6 +65,12 @@ void soft_restart(unsigned long addr)
>>  //     smp_secondary_shutdown();
>>  #endif
>>
>> +       /* Delay primary cpu; allow enough time for
>> +        * secondaries to enter spin loop
>> +        */
>> +       if (smp_processor_id() == 0)
>> +               mdelay(1000);
>> +
>>         cpu_soft_restart(virt_to_phys(cpu_reset), addr);
>>
>>         /* Should never get here */
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 3cb6dec..f6ab468 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -76,6 +76,10 @@ ENTRY(cpu_reset)
>>  #if defined(CONFIG_SMP)
>>  /*     bl      secondary_shutdown */
>>  #endif
>> +
>> +       /* Trap to EL2 */
>> +       mov     x1, #3 << 20
>> +       msr     cpacr_el1, x1                   // Enable FP/ASIMD
>>         ret     x0
>>  ENDPROC(cpu_reset)
>>
>> @@ -200,8 +204,6 @@ ENTRY(__cpu_setup)
>>         tlbi    vmalle1is                       // invalidate I + D TLBs
>>         dsb     ish
>>
>> -       mov     x0, #3 << 20
>> -       msr     cpacr_el1, x0                   // Enable FP/ASIMD
>>         msr     mdscr_el1, xzr                  // Reset mdscr_el1
>>         /*
>>          * Memory region attributes for LPAE:
>> #########
>>
>> With the above code I was able to manually kexec
>> an SMP kernel (With the help of debugger).
>>
>> Basically the branch instruction (br x0) is not working
>> in the el2 vector.
>> ------------------
>> CPU#0>h
>>     Core number       : 0
>>     Core state        : debug (AArch64 EL2)
>>     Debug entry cause : External Debug Request
>>     Current PC        : 0x0000004000089800
>
> Does this address look similar to the VBAR_EL2, perhaps?
>
>>     Current CPSR      : 0x600003c9 (EL2h)
>> CPU#0>rd
>> GPR00: 00000043eb891000 0000000000000018 0000000000000006 0000000000000004
>> GPR04: 000000000000001f 000000000000001b 0000000000000000 ffffffffffffffff
>> GPR08: ffffffc3ffe80614 ffffffffffffffff 0000000000000000 0000000000000002
>> GPR12: ffffffc0000968f0 00000040008c0000 00000043eb949002 ffffffffffffffff
>> GPR16: ffffffc0000c1244 0000000000435260 0000007ff9c5d490 00000040000968c0
>> GPR20: 00000043eb891000 ffffffc3eb891000 ffffffc3eb973c00 0000000000000110
>> GPR24: ffffffc000094000 ffffffc000094cd8 000000000000008e ffffffc00082f000
>> GPR28: ffffffc3e9888000 ffffffc3e988bd00 ffffffc000095d88 00000043efb24430
>> CPU#0>go 0x00000043eb891000
>> --------------------
>>
>> I had to use the debugger  to make cpu0 jump to kexec-boot code.
>> Similarly I had to manually put other CPUs to spinning code
>> using debugger. Could you please throw some light here?
>
> I really can't say. I know nothing of your platform or spin-table
> implementation, and this is nothing like what I'd expect the jump to EL2
> code to look like.
>
> Thanks,
> Mark.

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-26 13:00                 ` Arun Chandran
@ 2014-08-26 14:08                   ` Mark Rutland
  0 siblings, 0 replies; 33+ messages in thread
From: Mark Rutland @ 2014-08-26 14:08 UTC (permalink / raw)
  To: linux-arm-kernel


Hi Arun,

Please start a new thread if you have new things to say; this thread has
drifted far from its original purpose (the titular patch is now in the
arm64 devel branch [1]).

[...]

> > Ok. So we need to do what I have suggested elsewhere w.r.t. jumping back
> > up to EL2. As you point out below we need to synchronise with the CPUs
> > on their way out too we can add a spin-table cpu_kill with a slightly
> > dodgy delay to ensure that, given we have no other mechanism for
> > synchronising.
> >
> 
> I was able to remove the delay by pushing "set_cpu_online(cpu, false);"
> further down.
> 
> ############
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 3fb64cb..200e49e 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -543,8 +543,6 @@ static void ipi_cpu_stop(unsigned int cpu)
>                 raw_spin_unlock(&stop_lock);
>         }
> 
> -       set_cpu_online(cpu, false);
> -
>         local_irq_disable();
> 

If we don't set the cpu offline here, we'll call
clear_tasks_mm_cpumask(cpu) and migrate IRQs while the CPU is marked
online, wait a bit, then mark the CPU offline. 

I suspect this is broken, though I could be wrong.

>         /* If we have the cpu ops use them. */
> @@ -554,6 +552,8 @@ static void ipi_cpu_stop(unsigned int cpu)
>                 && !cpu_ops[cpu]->cpu_disable(cpu))
>                 cpu_ops[cpu]->cpu_die(cpu);
> 
> +
> +       set_cpu_online(cpu, false);
>         /* Otherwise spin here. */
> 
>         while (1)
> diff --git a/arch/arm64/kernel/smp_spin_table.c
> b/arch/arm64/kernel/smp_spin_table.c
> index e7275b3..8dcca88 100644
> --- a/arch/arm64/kernel/smp_spin_table.c
> +++ b/arch/arm64/kernel/smp_spin_table.c
> @@ -149,6 +149,7 @@ static void smp_spin_table_cpu_die(unsigned int cpu)
>         *release_addr = 0;
>         __flush_dcache_area(release_addr, 8);
> 
> +       set_cpu_online(cpu, false);
>         mb();
> 
>         soft_restart(0);
> ##############
> 
> This will
> 
> a) Make the waiting loop inside smp_send_stop() more meaningful

I don't follow how this is any more meaningful. We still have no
guarantee that the CPU is actually dead.

> b) Make sure that at least cpu-release-addr is invalidated.

There is still a period between the call to set_cpu_online(cpu, false)
and the CPU jumping to the return-address where it is still in the
kernel, so all this does is shorten the window for the race.

For PSCI 0.2+ we can poll to determine when the CPU is in the firmware.
For PSCI prior to 0.2 we can't, but the window is very short (as the
firmware performs the cache maintenance) and we seem to have gotten away
with it so far.

For spin-table we might have a large race window because the kernel must
flush the caches at EL2, incurring a relatively large delay. If we are
encountering a race there I'd rather this were fixed with a cpu_kill
callback for spin-table.

Cheers,
Mark.

[1] https://git.kernel.org/cgit/linux/kernel/git/arm64/linux.git/commit/?h=devel&id=6c80fe35fe9edf9147e3db9c8ff1a7761c49c4cc

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-25 14:14         ` Arun Chandran
@ 2014-08-26 15:22           ` Mark Rutland
  2014-08-26 16:14             ` Arun Chandran
  0 siblings, 1 reply; 33+ messages in thread
From: Mark Rutland @ 2014-08-26 15:22 UTC (permalink / raw)
  To: linux-arm-kernel

> Hi Mark,

Hi Arun,

> Please Ignore my previous mail. I think this version of sample
> implementation is simple and better. Please give your comments.

If you wish to have this reviewed in any depth, please:

 - Base your patches on mainline, or at the very least provide a pointer
   to the branch the patch is based upon.

 - Test that this functions with and without CONFIG_KVM.

 - Post this as an RFC PATCH, complete with a rationale (as previously
   discussed).

 - For later revisions, please label the patches with a version and give
   adequate delay between postings to allow for review.

I've given some comments below, but please post proper patches in
future.

> ###########
> diff --git a/arch/arm64/include/asm/proc-fns.h
> b/arch/arm64/include/asm/proc-fns.h
> index ddbc3f5..40d3360 100644
> --- a/arch/arm64/include/asm/proc-fns.h
> +++ b/arch/arm64/include/asm/proc-fns.h
> @@ -31,7 +31,7 @@ struct cpu_suspend_ctx;
>  extern void cpu_cache_off(void);
>  extern void cpu_do_idle(void);
>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
> -extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
> +extern void cpu_reset(unsigned long addr, unsigned long boot_mode)
> __attribute__((noreturn));
>  extern void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long
> addr) __attribute__((noreturn));
>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 215ad46..3976737 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -34,6 +34,7 @@
>   */
>  extern u32 __boot_cpu_mode[2];
> 
> +void __hyp_func_call(phys_addr_t func, ...);
>  void __hyp_set_vectors(phys_addr_t phys_vector_base);
>  phys_addr_t __hyp_get_vectors(void);
> 
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index a272f33..82c2b0d 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -53,14 +53,12 @@ ENDPROC(__hyp_stub_vectors)
>         .align 11
> 
>  el1_sync:
> -       mrs     x1, esr_el2
> -       lsr     x1, x1, #26
> -       cmp     x1, #0x16
> +       mrs     x19, esr_el2
> +       lsr     x19, x19, #26
> +       cmp     x19, #0x16
>         b.ne    2f                              // Not an HVC trap
> -       cbz     x0, 1f
> -       msr     vbar_el2, x0                    // Set vbar_el2
> -       b       2f
> -1:     mrs     x0, vbar_el2                    // Return vbar_el2
> +
> +1:     blr     x0                              // Jump to the function

You need to stash the orignal lr, or we can't return from
__hyp_call_func.

>  2:     eret
>  ENDPROC(el1_sync)
> 
> @@ -101,10 +99,17 @@ ENDPROC(\label)
>   */
> 
>  ENTRY(__hyp_get_vectors)
> -       mov     x0, xzr
> -       // fall through
> -ENTRY(__hyp_set_vectors)
> -       hvc     #0
> +       mrs     x0, vbar_el2                    // Return vbar_el2
>         ret
>  ENDPROC(__hyp_get_vectors)
> +
> +ENTRY(__hyp_set_vectors)
> +       msr     vbar_el2, x1
> +       ret
>  ENDPROC(__hyp_set_vectors)
> +
> +/* Call a function @x0 */
> +ENTRY(__hyp_func_call)
> +       hvc     #0
> +       ret
> +ENDPROC(__hyp_func_call)

These will be called at EL1, so this breaks KVM.

> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 3cb6dec..e68f42e 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -25,6 +25,7 @@
>  #include <asm/hwcap.h>
>  #include <asm/pgtable-hwdef.h>
>  #include <asm/pgtable.h>
> +#include <asm/virt.h>
> 
>  #include "proc-macros.S"
> 
> @@ -69,19 +70,26 @@ ENDPROC(cpu_cache_off)
>   */
>         .align  5
>  ENTRY(cpu_reset)
> -       mrs     x1, sctlr_el1
> -       bic     x1, x1, #1
> -       msr     sctlr_el1, x1                   // disable the MMU
> +       mrs     x2, sctlr_el1
> +       bic     x2, x2, #1
> +       msr     sctlr_el1, x2                   // disable the MMU
>         isb
>  #if defined(CONFIG_SMP)
>  /*     bl      secondary_shutdown */
>  #endif
> +       sub     w1, w1, #BOOT_CPU_MODE_EL2
> +       cbz     w1, __hyp_func_call
>         ret     x0
>  ENDPROC(cpu_reset)

Please base your patches on mainline (or if you need the fixed-up
soft_restart, the arm64 devel branch).

I thought we'd shot down the idea of the secondary_shutdown call.

>  ENTRY(cpu_soft_restart)
> +       ldr     x3, =__boot_cpu_mode
> +       add     x3, x3, #4
> +       ldr     w2, [x3]
> +
>         mov     x19, x0
>         mov     x20, x1
> +       mov     w21, w2
> 
>         /* Turn D-cache off */
>         bl      cpu_cache_off
> @@ -89,6 +97,7 @@ ENTRY(cpu_soft_restart)
>         bl      flush_cache_all
> 
>         mov     x0, x20
> +       mov     w1, w21
>         ret     x19
>  ENDPROC(cpu_soft_restart)
> #########
> 
> I have implemented __hyp_func_call; other userscan
> make use of it to call __hyp_set_vectors/__hyp_get_vectors

The callers of those functions _must_ be updated in this patch to use
the new hypercall mechanism.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Arm64: convert soft_restart() to assembly code
  2014-08-26 15:22           ` Mark Rutland
@ 2014-08-26 16:14             ` Arun Chandran
  0 siblings, 0 replies; 33+ messages in thread
From: Arun Chandran @ 2014-08-26 16:14 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Mark,

On Tue, Aug 26, 2014 at 8:52 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> Hi Mark,
>
> Hi Arun,
>
>> Please Ignore my previous mail. I think this version of sample
>> implementation is simple and better. Please give your comments.
>
> If you wish to have this reviewed in any depth, please:
>
>  - Base your patches on mainline, or at the very least provide a pointer
>    to the branch the patch is based upon.
>
>  - Test that this functions with and without CONFIG_KVM.
>
>  - Post this as an RFC PATCH, complete with a rationale (as previously
>    discussed).
>
>  - For later revisions, please label the patches with a version and give
>    adequate delay between postings to allow for review.
>

Yes I will put a RFC patch soon.

> I've given some comments below, but please post proper patches in
> future.
>

My Intention was to do the same(RFC patch); but I did not wanted
to drift a lot from your idea that is why I sent it like below.

>> ###########
>> diff --git a/arch/arm64/include/asm/proc-fns.h
>> b/arch/arm64/include/asm/proc-fns.h
>> index ddbc3f5..40d3360 100644
>> --- a/arch/arm64/include/asm/proc-fns.h
>> +++ b/arch/arm64/include/asm/proc-fns.h
>> @@ -31,7 +31,7 @@ struct cpu_suspend_ctx;
>>  extern void cpu_cache_off(void);
>>  extern void cpu_do_idle(void);
>>  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
>> -extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
>> +extern void cpu_reset(unsigned long addr, unsigned long boot_mode)
>> __attribute__((noreturn));
>>  extern void cpu_soft_restart(phys_addr_t cpu_reset, unsigned long
>> addr) __attribute__((noreturn));
>>  extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
>>  extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
>> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
>> index 215ad46..3976737 100644
>> --- a/arch/arm64/include/asm/virt.h
>> +++ b/arch/arm64/include/asm/virt.h
>> @@ -34,6 +34,7 @@
>>   */
>>  extern u32 __boot_cpu_mode[2];
>>
>> +void __hyp_func_call(phys_addr_t func, ...);
>>  void __hyp_set_vectors(phys_addr_t phys_vector_base);
>>  phys_addr_t __hyp_get_vectors(void);
>>
>> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
>> index a272f33..82c2b0d 100644
>> --- a/arch/arm64/kernel/hyp-stub.S
>> +++ b/arch/arm64/kernel/hyp-stub.S
>> @@ -53,14 +53,12 @@ ENDPROC(__hyp_stub_vectors)
>>         .align 11
>>
>>  el1_sync:
>> -       mrs     x1, esr_el2
>> -       lsr     x1, x1, #26
>> -       cmp     x1, #0x16
>> +       mrs     x19, esr_el2
>> +       lsr     x19, x19, #26
>> +       cmp     x19, #0x16
>>         b.ne    2f                              // Not an HVC trap
>> -       cbz     x0, 1f
>> -       msr     vbar_el2, x0                    // Set vbar_el2
>> -       b       2f
>> -1:     mrs     x0, vbar_el2                    // Return vbar_el2
>> +
>> +1:     blr     x0                              // Jump to the function
>
> You need to stash the orignal lr, or we can't return from
> __hyp_call_func.

Ok.
>
>>  2:     eret
>>  ENDPROC(el1_sync)
>>
>> @@ -101,10 +99,17 @@ ENDPROC(\label)
>>   */
>>
>>  ENTRY(__hyp_get_vectors)
>> -       mov     x0, xzr
>> -       // fall through
>> -ENTRY(__hyp_set_vectors)
>> -       hvc     #0
>> +       mrs     x0, vbar_el2                    // Return vbar_el2
>>         ret
>>  ENDPROC(__hyp_get_vectors)
>> +
>> +ENTRY(__hyp_set_vectors)
>> +       msr     vbar_el2, x1
>> +       ret
>>  ENDPROC(__hyp_set_vectors)
>> +
>> +/* Call a function @x0 */
>> +ENTRY(__hyp_func_call)
>> +       hvc     #0
>> +       ret
>> +ENDPROC(__hyp_func_call)
>
> These will be called at EL1, so this breaks KVM.
>
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 3cb6dec..e68f42e 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -25,6 +25,7 @@
>>  #include <asm/hwcap.h>
>>  #include <asm/pgtable-hwdef.h>
>>  #include <asm/pgtable.h>
>> +#include <asm/virt.h>
>>
>>  #include "proc-macros.S"
>>
>> @@ -69,19 +70,26 @@ ENDPROC(cpu_cache_off)
>>   */
>>         .align  5
>>  ENTRY(cpu_reset)
>> -       mrs     x1, sctlr_el1
>> -       bic     x1, x1, #1
>> -       msr     sctlr_el1, x1                   // disable the MMU
>> +       mrs     x2, sctlr_el1
>> +       bic     x2, x2, #1
>> +       msr     sctlr_el1, x2                   // disable the MMU
>>         isb
>>  #if defined(CONFIG_SMP)
>>  /*     bl      secondary_shutdown */
>>  #endif
>> +       sub     w1, w1, #BOOT_CPU_MODE_EL2
>> +       cbz     w1, __hyp_func_call
>>         ret     x0
>>  ENDPROC(cpu_reset)
>
> Please base your patches on mainline (or if you need the fixed-up
> soft_restart, the arm64 devel branch).
>
> I thought we'd shot down the idea of the secondary_shutdown call.
>
Yes. Please see it is commented.

>>  ENTRY(cpu_soft_restart)
>> +       ldr     x3, =__boot_cpu_mode
>> +       add     x3, x3, #4
>> +       ldr     w2, [x3]
>> +
>>         mov     x19, x0
>>         mov     x20, x1
>> +       mov     w21, w2
>>
>>         /* Turn D-cache off */
>>         bl      cpu_cache_off
>> @@ -89,6 +97,7 @@ ENTRY(cpu_soft_restart)
>>         bl      flush_cache_all
>>
>>         mov     x0, x20
>> +       mov     w1, w21
>>         ret     x19
>>  ENDPROC(cpu_soft_restart)
>> #########
>>
>> I have implemented __hyp_func_call; other userscan
>> make use of it to call __hyp_set_vectors/__hyp_get_vectors
>
> The callers of those functions _must_ be updated in this patch to use
> the new hypercall mechanism.

Ok.

--Arun

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2014-08-26 16:14 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-12 12:42 [PATCH] Arm64: convert soft_restart() to assembly code Arun Chandran
2014-08-12 14:05 ` Mark Rutland
2014-08-13  4:57   ` Arun Chandran
2014-08-13  7:43 ` [PATCH] Arm64: convert part of soft_restart() to assembly Arun Chandran
2014-08-13 10:58   ` Mark Rutland
2014-08-13 11:17     ` Arun Chandran
2014-08-13 11:21       ` Mark Rutland
2014-08-15 17:20 ` [PATCH] Arm64: convert soft_restart() to assembly code Geoff Levand
2014-08-15 18:21   ` Mark Rutland
2014-08-15 18:53     ` Geoff Levand
2014-08-18 16:02       ` Mark Rutland
2014-08-18 17:33         ` Christoffer Dall
2014-08-19  1:10           ` Geoff Levand
2014-08-20 10:48           ` Mark Rutland
2014-08-20 10:54             ` Christoffer Dall
2014-08-20 11:21               ` Mark Rutland
2014-08-25 11:04         ` Arun Chandran
2014-08-25 14:14         ` Arun Chandran
2014-08-26 15:22           ` Mark Rutland
2014-08-26 16:14             ` Arun Chandran
2014-08-18  6:43     ` Arun Chandran
2014-08-19  9:04     ` Arun Chandran
2014-08-20 10:28     ` Arun Chandran
2014-08-20 10:54       ` Mark Rutland
2014-08-20 13:57         ` Arun Chandran
2014-08-20 14:16           ` Mark Rutland
2014-08-21 13:34             ` Arun Chandran
2014-08-21 14:31               ` Mark Rutland
2014-08-22 11:11                 ` Arun Chandran
2014-08-22 13:15                   ` Mark Rutland
2014-08-23 19:50                     ` Arun Chandran
2014-08-26 13:00                 ` Arun Chandran
2014-08-26 14:08                   ` Mark Rutland

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.