From mboxrd@z Thu Jan  1 00:00:00 1970
From: Huan Wang <alison.wang@nxp.com>
Date: Fri, 20 May 2016 06:53:00 +0000
Subject: [U-Boot] [PATCH v2 1/2] armv8: Support loading 32-bit OS in
 AArch32 execution state
In-Reply-To: <573D804F.9000801@suse.de>
References: <1463646362-15531-1-git-send-email-b18965@freescale.com>
 <1463646362-15531-2-git-send-email-b18965@freescale.com>
 <573D804F.9000801@suse.de>
Message-ID: <AM4PR04MB2145999FC5CE2C3A084D462EF44B0@AM4PR04MB2145.eurprd04.prod.outlook.com>
List-Id: <u-boot.lists.denx.de>
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
To: u-boot@lists.denx.de

> On 05/19/2016 10:26 AM, Alison Wang wrote:
> > To support loading a 32-bit OS, the execution state will change from
> > AArch64 to AArch32 when jumping to kernel.
> >
> > The architecture information will be got through checking FIT image,
> > then U-Boot will load 32-bit OS or 64-bit OS automatically.
> >
> > Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
> > Signed-off-by: Alison Wang <alison.wang@nxp.com>
> > Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
> > ---
> > Changes in v2:
> > - armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is
> used
> >    to switch to AArch64 EL2 or AArch32 Hyp.
> > - armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is
> used
> >    to switch to AArch64 EL1 or AArch32 SVC.
> >
> >   arch/arm/cpu/armv8/transition.S |  8 ++---
> >   arch/arm/include/asm/macro.h    | 80
> +++++++++++++++++++++++++++++++++++++----
> >   arch/arm/include/asm/system.h   | 25 +++++++++++--
> >   arch/arm/lib/bootm.c            | 26 +++++++++++---
> >   common/image-fit.c              | 12 ++++++-
> >   5 files changed, 133 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/arm/cpu/armv8/transition.S
> > b/arch/arm/cpu/armv8/transition.S index 253a39b..9fa3b59 100644
> > --- a/arch/arm/cpu/armv8/transition.S
> > +++ b/arch/arm/cpu/armv8/transition.S
> > @@ -11,13 +11,13 @@
> >   #include <asm/macro.h>
> >
> >   ENTRY(armv8_switch_to_el2)
> > -	switch_el x0, 1f, 0f, 0f
> > +	switch_el x4, 1f, 0f, 0f
> >   0:	ret
> > -1:	armv8_switch_to_el2_m x0
> > +1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
> >   ENDPROC(armv8_switch_to_el2)
> >
> >   ENTRY(armv8_switch_to_el1)
> > -	switch_el x0, 0f, 1f, 0f
> > +	switch_el x4, 0f, 1f, 0f
> >   0:	ret
> > -1:	armv8_switch_to_el1_m x0, x1
> > +1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
> >   ENDPROC(armv8_switch_to_el1)
> > diff --git a/arch/arm/include/asm/macro.h
> > b/arch/arm/include/asm/macro.h index 9bb0efa..56d77f6 100644
> > --- a/arch/arm/include/asm/macro.h
> > +++ b/arch/arm/include/asm/macro.h
> > @@ -8,6 +8,9 @@
> >
> >   #ifndef __ASM_ARM_MACRO_H__
> >   #define __ASM_ARM_MACRO_H__
> > +
> > +#include <asm/system.h>
> > +
> >   #ifdef __ASSEMBLY__
> >
> >   /*
> > @@ -135,9 +138,20 @@ lr	.req	x30
> >   #endif
> >   .endm
> >
> > -.macro armv8_switch_to_el2_m, xreg1
> > -	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1
> */
> > -	mov	\xreg1, #0x5b1
> > +.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5,
> > +xreg6, xreg7
> 
> These arguments should probably get documented :). Also xreg4 doesn't
> seem to actually be a scratch register, but a real parameter. So please
> name it accordingly.
[Alison Wang] Ok, I will add the descriptions for these arguments. Not only
xreg4, xreg1, xreg2 and xreg3 are real parameters too. I will rename them in
the next version.
> 
> > +	mov	\xreg5, \xreg1
> > +	mov	\xreg6, \xreg2
> > +	mov	\xreg7, \xreg3
> > +
> > +	/*
> > +	 * If the next lower exception level is AArch64, 64bit EL2 | HCE |
> SMD |
> > +	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower
> exception
> > +	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
> > +	 * Non-secure EL0/EL1.
> > +	 */
> > +	mov	\xreg1, #0x1b1
> > +	lsl	\xreg2, \xreg4, #10
> > +	orr	\xreg1, \xreg1, \xreg2
> 
> Is there any way you can make this obvious from the code? Basically we
> would want something like
> 
> mov \xreg1, #(SCR_EL3_EL2_AA64 | SCR_EL3_HCE | SCR_EL3_SMD ...)
> 
> I'm sure there are creative ways to achieve this even with the high bits
> set. Maybe ldr = works?
[Alison Wang] Yes, I will change them and make them obvious.
> 
> >   	msr	scr_el3, \xreg1
> >   	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3
> */
> >   	mov	\xreg1, #0x33ff
> > @@ -156,18 +170,46 @@ lr	.req	x30
> >   	movk	\xreg1, #0x30C5, lsl #16
> >   	msr	sctlr_el2, \xreg1
> >
> > -	/* Return to the EL2_SP2 mode from EL3 */
> >   	mov	\xreg1, sp
> >   	msr	sp_el2, \xreg1		/* Migrate SP */
> >   	mrs	\xreg1, vbar_el3
> >   	msr	vbar_el2, \xreg1	/* Migrate VBAR */
> > +
> > +	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
> > +	ldr	\xreg1, =ES_TO_AARCH64
> > +	cmp	\xreg4, \xreg1
> > +	b.eq	1f
> > +	b.ne	2f
> 
> Just b.ne should be enough, no?
[Alison Wang] Yes, you are right.
> 
> > +
> > +1:
> > +	/* Return to the EL2_SP2 mode from EL3 */
> >   	mov	\xreg1, #0x3c9
> 
> Magic values again :). Please convert them to constants getting ORed.
[Alison Wang] Ok, I will change the original magic values too.
> 
> >   	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
> >   	msr	elr_el3, lr
> > +
> > +	mov	\xreg1, \xreg5
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> > +	eret
> > +
> > +2:
> > +	/* Return to AArch32 Hypervisor mode */
> > +	mov     \xreg1, #0x1da
> > +	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
> > +	msr     elr_el3, \xreg5
> > +
> > +	mov	\xreg1, #0
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> >   	eret
> > +
> >   .endm
> >
> > -.macro armv8_switch_to_el1_m, xreg1, xreg2
> > +.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5,
> xreg6, xreg7
> > +	mov	\xreg5, \xreg1
> > +	mov	\xreg6, \xreg2
> > +	mov	\xreg7, \xreg3
> > +
> >   	/* Initialize Generic Timers */
> >   	mrs	\xreg1, cnthctl_el2
> >   	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
> > @@ -188,7 +230,7 @@ lr	.req	x30
> >   	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
> >
> >   	/* Initialize HCR_EL2 */
> > -	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
> > +	lsl	\xreg1, \xreg4, #31
> 
> This deserves a comment. Imagine you read this code again in 5 years.
> Would you still understand what's going on?
[Alison Wang] Ok, I will add a comment.
> 
> >   	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
> >   	msr	hcr_el2, \xreg1
> >
> > @@ -203,15 +245,39 @@ lr	.req	x30
> >   	movk	\xreg1, #0x30d0, lsl #16
> >   	msr	sctlr_el1, \xreg1
> >
> > -	/* Return to the EL1_SP1 mode from EL2 */
> >   	mov	\xreg1, sp
> >   	msr	sp_el1, \xreg1		/* Migrate SP */
> >   	mrs	\xreg1, vbar_el2
> >   	msr	vbar_el1, \xreg1	/* Migrate VBAR */
> > +
> > +	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
> > +	ldr	\xreg1, =ES_TO_AARCH64
> > +	cmp	\xreg4, \xreg1
> > +	b.eq	1f
> > +	b.ne	2f
> > +
> > +1:
> > +	/* Return to the EL1_SP1 mode from EL2 */
> >   	mov	\xreg1, #0x3c5
> >   	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
> >   	msr	elr_el2, lr
> > +
> > +	mov	\xreg1, \xreg5
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> > +	eret
> > +
> > +2:
> > +	/* Return to the Supervisor mode from EL2 */
> > +	mov     \xreg1, #0x1d3
> > +	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
> > +	msr     elr_el2, \xreg5
> > +
> > +	mov	\xreg1, #0
> > +	mov	\xreg2, \xreg6
> > +	mov	\xreg3, \xreg7
> >   	eret
> > +
> >   .endm
> >
> >   #if defined(CONFIG_GICV3)
> > diff --git a/arch/arm/include/asm/system.h
> > b/arch/arm/include/asm/system.h index 9ae890a..fcfe3ae 100644
> > --- a/arch/arm/include/asm/system.h
> > +++ b/arch/arm/include/asm/system.h
> > @@ -17,6 +17,9 @@
> >   #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
> >   #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
> >
> > +#define ES_TO_AARCH64			1
> > +#define ES_TO_AARCH32			0
> > +
> >   #ifndef __ASSEMBLY__
> >
> >   u64 get_page_table_size(void);
> > @@ -100,8 +103,26 @@ void __asm_invalidate_icache_all(void);
> >   int __asm_flush_l3_cache(void);
> >   void __asm_switch_ttbr(u64 new_ttbr);
> >
> > -void armv8_switch_to_el2(void);
> > -void armv8_switch_to_el1(void);
> > +/*
> > + * Switch from EL3 to EL2 for ARMv8
> > + *
> > + * @entry_point: kernel entry point
> > + * @mach_nr:     machine nr
> > + * @fdt_addr:    fdt address
> > + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> > + */
> > +void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> > +			 u64 es_flag);
> > +/*
> > + * Switch from EL2 to EL1 for ARMv8
> > + *
> > + * @entry_point: kernel entry point
> > + * @mach_nr:     machine nr
> > + * @fdt_addr:    fdt address
> > + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> > + */
> > +void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> > +			 u64 es_flag);
> >   void gic_init(void);
> >   void gic_send_sgi(unsigned long sgino);
> >   void wait_for_wakeup(void);
> > diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c index
> > 0838d89..f6f685a 100644
> > --- a/arch/arm/lib/bootm.c
> > +++ b/arch/arm/lib/bootm.c
> > @@ -193,9 +193,9 @@ static void do_nonsec_virt_switch(void)
> >   {
> >   	smp_kick_all_cpus();
> >   	dcache_disable();	/* flush cache before swtiching to EL2 */
> > -	armv8_switch_to_el2();
> > +	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> 
> Just rename the existing function to armv8_switch_to_el2_adv() or so and
> provide armv8_switch_to_el2() as static inline function that simply
> invokes armv8_switch_to_el2_adv(0, 0, 0, ES_TO_AARCH64);
[Alison Wang] I didn't know why you suggest to do so. Please explain it.
Thanks.
> 
> >   #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> > -	armv8_switch_to_el1();
> > +	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
> >   #endif
> >   }
> >   #endif
> > @@ -286,8 +286,26 @@ static void boot_jump_linux(bootm_headers_t
> *images, int flag)
> >   	announce_and_cleanup(fake);
> >
> >   	if (!fake) {
> > -		do_nonsec_virt_switch();
> > -		kernel_entry(images->ft_addr, NULL, NULL, NULL);
> > +		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
> > +		    (images->os.arch == IH_ARCH_ARM)) {
> > +			smp_kick_all_cpus();
> > +			dcache_disable();
> 
> This open codes part of do_nonsec_virt_switch().
> 
> Could we just clean up all of that mess a bit more maybe? How about you
> call
> 
> do_nonsec_virt_switch(ES_EL2 | ES_AARCH32, entry, images->ft_addr) or
> do_nonsec_virt_switch(ES_EL1 | ES_AARCH64, entry, images->ft_addr)
> 
> Then the if() would only need to set ES_AARCH32/64 in a variable and
> everything else goes along the same code.
[Alison Wang] It sounds fine. I will try to do it in the next version.
> 
> > +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> > +			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> > +			armv8_switch_to_el1((u64)images->ep,
> > +					    (u64)gd->bd->bi_arch_number,
> > +					    (u64)images->ft_addr,
> > +					    ES_TO_AARCH32);
> > +#else
> > +			armv8_switch_to_el2((u64)images->ep,
> > +					    (u64)gd->bd->bi_arch_number,
> > +					    (u64)images->ft_addr,
> > +					    ES_TO_AARCH32);
> > +#endif
> > +		} else {
> > +			do_nonsec_virt_switch();
> > +			kernel_entry(images->ft_addr, NULL, NULL, NULL);
> > +		}
> >   	}
> >   #else
> >   	unsigned long machid = gd->bd->bi_arch_number; diff --git
> > a/common/image-fit.c b/common/image-fit.c index 25f8a11..2986469
> > 100644
> > --- a/common/image-fit.c
> > +++ b/common/image-fit.c
> > @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int
> noffset, uint8_t arch)
> >   	if (fit_image_get_arch(fit, noffset, &image_arch))
> >   		return 0;
> >   	return (arch == image_arch) ||
> > -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
> > +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
> > +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);
> 
> This should be a Kconfig option. ThunderX doesn't support AArch32
> execution.
[Alison Wang] I can't understand your meaning. Can you clarify it?
> 
> >   }
> >
> >   /**
> > @@ -1586,6 +1587,9 @@ int fit_image_load(bootm_headers_t *images,
> ulong addr,
> >   	int type_ok, os_ok;
> >   	ulong load, data, len;
> >   	uint8_t os;
> > +#ifndef USE_HOSTCC
> > +	uint8_t os_arch;
> > +#endif
> >   	const char *prop_name;
> >   	int ret;
> >
> > @@ -1669,6 +1673,12 @@ int fit_image_load(bootm_headers_t *images,
> ulong addr,
> >   		return -ENOEXEC;
> >   	}
> >   #endif
> > +
> > +#ifndef USE_HOSTCC
> > +	fit_image_get_arch(fit, noffset, &os_arch);
> > +	images->os.arch = os_arch;
> > +#endif
> > +
> >   	if (image_type == IH_TYPE_FLATDT &&
> >   	    !fit_image_check_comp(fit, noffset, IH_COMP_NONE)) {
> >   		puts("FDT image is compressed");

Thanks a lot for your review.


Best Regards,
Alison Wang