From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EE296C25B0E for ; Fri, 12 Aug 2022 14:34:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238530AbiHLOe3 (ORCPT ); Fri, 12 Aug 2022 10:34:29 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54822 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S236950AbiHLOe2 (ORCPT ); Fri, 12 Aug 2022 10:34:28 -0400 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id B5C61AB07B for ; Fri, 12 Aug 2022 07:34:26 -0700 (PDT) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 354F6106F; Fri, 12 Aug 2022 07:34:27 -0700 (PDT) Received: from [192.168.12.23] (unknown [172.31.20.19]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id DB0663F5A1; Fri, 12 Aug 2022 07:34:24 -0700 (PDT) Message-ID: <8af01455-f228-676c-9cc7-d6933219dd3d@arm.com> Date: Fri, 12 Aug 2022 15:34:15 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:91.0) Gecko/20100101 Thunderbird/91.12.0 Subject: Re: [kvm-unit-tests PATCH v3 19/27] arm/arm64: Add a setup sequence for systems that boot through EFI Content-Language: en-GB To: Alexandru Elisei Cc: kvm@vger.kernel.org, andrew.jones@linux.dev, drjones@redhat.com, pbonzini@redhat.com, jade.alglave@arm.com, ricarkol@google.com References: <20220630100324.3153655-1-nikos.nikoleris@arm.com> <20220630100324.3153655-20-nikos.nikoleris@arm.com> From: Nikos Nikoleris In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org On 19/07/2022 15:08, Alexandru Elisei wrote: > Hi, > > On Thu, Jun 30, 2022 at 11:03:16AM +0100, Nikos Nikoleris wrote: >> This change implements an alternative setup sequence for the system >> when we are booting through EFI. The memory map is discovered through >> EFI boot services and devices through ACPI. >> >> This change is based on a change initially proposed by >> Andrew Jones >> >> Signed-off-by: Nikos Nikoleris >> --- >> lib/linux/efi.h | 1 + >> lib/arm/asm/setup.h | 2 + >> lib/arm/setup.c | 181 +++++++++++++++++++++++++++++++++++++++++++- >> arm/cstart.S | 1 + >> arm/cstart64.S | 1 + >> 5 files changed, 184 insertions(+), 2 deletions(-) >> >> diff --git a/lib/linux/efi.h b/lib/linux/efi.h >> index 53748dd..89f9a9e 100644 >> --- a/lib/linux/efi.h >> +++ b/lib/linux/efi.h >> @@ -63,6 +63,7 @@ typedef guid_t efi_guid_t; >> (c) & 0xff, ((c) >> 8) & 0xff, d } } >> >> #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) >> +#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) >> >> #define LOADED_IMAGE_PROTOCOL_GUID EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) >> >> diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h >> index 64cd379..c4cd485 100644 >> --- a/lib/arm/asm/setup.h >> +++ b/lib/arm/asm/setup.h >> @@ -6,6 +6,7 @@ >> * This work is licensed under the terms of the GNU LGPL, version 2. >> */ >> #include >> +#include >> #include >> #include >> >> @@ -37,5 +38,6 @@ extern unsigned int mem_region_get_flags(phys_addr_t paddr); >> #define SMP_CACHE_BYTES L1_CACHE_BYTES >> >> void setup(const void *fdt, phys_addr_t freemem_start); >> +efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo); >> >> #endif /* _ASMARM_SETUP_H_ */ >> diff --git a/lib/arm/setup.c b/lib/arm/setup.c >> index 13513d0..30d04d0 100644 >> --- a/lib/arm/setup.c >> +++ b/lib/arm/setup.c >> @@ -34,7 +34,7 @@ >> #define NR_EXTRA_MEM_REGIONS 16 >> #define NR_INITIAL_MEM_REGIONS (MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS) >> >> -extern unsigned long _etext; >> +extern unsigned long _text, _etext, _data, _edata; >> >> char *initrd; >> u32 initrd_size; >> @@ -44,7 +44,10 @@ int nr_cpus; >> >> static struct mem_region __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1]; >> struct mem_region *mem_regions = __initial_mem_regions; >> -phys_addr_t __phys_offset, __phys_end; >> +phys_addr_t __phys_offset = (phys_addr_t)-1, __phys_end = 0; >> + >> +extern void exceptions_init(void); >> +extern void asm_mmu_disable(void); >> >> int mpidr_to_cpu(uint64_t mpidr) >> { >> @@ -272,3 +275,177 @@ void setup(const void *fdt, phys_addr_t freemem_start) >> if (!(auxinfo.flags & AUXINFO_MMU_OFF)) >> setup_vm(); >> } >> + >> +#ifdef CONFIG_EFI >> + >> +#include >> + >> +static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo) >> +{ >> + efi_status_t status; >> + struct rsdp_descriptor *rsdp; >> + >> + /* >> + * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used >> + * by kvm-unit-tests arm64 memory allocator. So it is not necessary to >> + * copy the data structure to another memory region to prevent >> + * unintentional overwrite. >> + */ >> + status = efi_get_system_config_table(ACPI_20_TABLE_GUID, (void **)&rsdp); >> + if (status != EFI_SUCCESS) >> + return status; >> + >> + set_efi_rsdp(rsdp); >> + >> + return EFI_SUCCESS; >> +} >> + >> +static efi_status_t efi_mem_init(efi_bootinfo_t *efi_bootinfo) >> +{ >> + int i; >> + unsigned long free_mem_pages = 0; >> + unsigned long free_mem_start = 0; >> + struct efi_boot_memmap *map = &(efi_bootinfo->mem_map); >> + efi_memory_desc_t *buffer = *map->map; >> + efi_memory_desc_t *d = NULL; >> + phys_addr_t base, top; >> + struct mem_region *r; >> + uintptr_t text = (uintptr_t)&_text, etext = __ALIGN((uintptr_t)&_etext, 4096); >> + uintptr_t data = (uintptr_t)&_data, edata = __ALIGN((uintptr_t)&_edata, 4096); >> + >> + /* >> + * Record the largest free EFI_CONVENTIONAL_MEMORY region >> + * which will be used to set up the memory allocator, so that >> + * the memory allocator can work in the largest free >> + * continuous memory region. >> + */ >> + for (i = 0, r = &mem_regions[0]; i < *(map->map_size); i += *(map->desc_size), ++r) { >> + d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]); >> + >> + r->start = d->phys_addr; >> + r->end = d->phys_addr + d->num_pages * EFI_PAGE_SIZE; >> + >> + switch (d->type) { >> + case EFI_RESERVED_TYPE: >> + case EFI_LOADER_DATA: >> + case EFI_BOOT_SERVICES_CODE: >> + case EFI_BOOT_SERVICES_DATA: >> + case EFI_RUNTIME_SERVICES_CODE: >> + case EFI_RUNTIME_SERVICES_DATA: >> + case EFI_UNUSABLE_MEMORY: >> + case EFI_ACPI_RECLAIM_MEMORY: >> + case EFI_ACPI_MEMORY_NVS: >> + case EFI_PAL_CODE: >> + r->flags = MR_F_RESERVED; >> + break; >> + case EFI_MEMORY_MAPPED_IO: >> + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: >> + r->flags = MR_F_IO; >> + break; >> + case EFI_LOADER_CODE: >> + if (r->start <= text && r->end > text) { >> + /* This is the unit test region. Flag the code separately. */ >> + phys_addr_t tmp = r->end; >> + >> + assert(etext <= data); >> + assert(edata <= r->end); >> + r->flags = MR_F_CODE; >> + r->end = data; >> + ++r; >> + r->start = data; >> + r->end = tmp; >> + } else { >> + r->flags = MR_F_RESERVED; >> + } >> + break; >> + case EFI_CONVENTIONAL_MEMORY: >> + if (free_mem_pages < d->num_pages) { >> + free_mem_pages = d->num_pages; >> + free_mem_start = d->phys_addr; >> + } >> + break; >> + } >> + >> + if (!(r->flags & MR_F_IO)) { >> + if (r->start < __phys_offset) >> + __phys_offset = r->start; >> + if (r->end > __phys_end) >> + __phys_end = r->end; > > What happens if there are holes between __phys_offset and __phys_end? The > boot path for KVM makes sure there are no holes. Wouldn't asm_mmu_disable() > trigger a translation fault if the address is not mapped because it > corresponds to a hole in the EFI provided memory map? > > What happens if the region [__phys_offset, __phys_end) contains one of the > EFI reserved memory types? That's not really something that kvm-unit-tests > should be poking. > That's a good point. > The efi boot code path changes the semantics for __phys_offset and > __phys_end, and that's a recipe for introducing bugs. > > I would suggest changing __phys_offset and __phys_end to represent > something that applies to both the KVM boot path and the EFI boot path. > Currently we're using __phys_offset and __phys_end in two places: * When we disable the MMU, to clean and invalidate the whole physical address space. * In selftest-mem, To compute the amount of memory available to the test. > One idea that occured to me would be to have separate text, data and > available memory regions. Have __phys_offset and __phys_end express the > start and end of the largest contiguous memory region, and initialize the > memory allocator from this region. That will also pave the way for handling > multiple memory regions from the DT. For EFI, this didn't work. __phys_end - __phys_offset doesn't produce the amount of memory expected by selftest-mem. > > Or, if you can prove and EFI_LOADER_CODE is always adjacent to > EFI_CONVENTIONAL_MEMORY, you can have __phys_offset and __phys_end describe > the region from the start of text to the end of EFI_CONVENTIONAL_MEMORY. > I don't think we can rely on this assumption. It doesn't hold on when I run experiments locally. > Thoughts? Suggestions? Could we get rid of __phys_offset and __phys_end? You wanted to move away from cleaning the whole memory for the purposes of disabling the MMU. We could just clean the regions as we discover them and keep a __phys_size for the purposes of selftest-mem. What do you think? Thanks, Nikos > > Thanks, > Alex > >> + } >> + } >> + __phys_end &= PHYS_MASK; >> + asm_mmu_disable(); >> + >> + if (free_mem_pages == 0) >> + return EFI_OUT_OF_RESOURCES; >> + >> + assert(sizeof(long) == 8 || free_mem_start < (3ul << 30)); >> + >> + phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT); >> + phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES); >> + >> + phys_alloc_get_unused(&base, &top); >> + base = PAGE_ALIGN(base); >> + top = top & PAGE_MASK; >> + assert(sizeof(long) == 8 || !(base >> 32)); >> + if (sizeof(long) != 8 && (top >> 32) != 0) >> + top = ((uint64_t)1 << 32); >> + page_alloc_init_area(0, base >> PAGE_SHIFT, top >> PAGE_SHIFT); >> + page_alloc_ops_enable(); >> + >> + return EFI_SUCCESS; >> +} >> + >> +efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) >> +{ >> + efi_status_t status; >> + >> + struct thread_info *ti = current_thread_info(); >> + >> + memset(ti, 0, sizeof(*ti)); >> + >> + exceptions_init(); >> + >> + status = efi_mem_init(efi_bootinfo); >> + if (status != EFI_SUCCESS) { >> + printf("Failed to initialize memory: "); >> + switch (status) { >> + case EFI_OUT_OF_RESOURCES: >> + printf("No free memory region\n"); >> + break; >> + default: >> + printf("Unknown error\n"); >> + break; >> + } >> + return status; >> + } >> + >> + status = setup_rsdp(efi_bootinfo); >> + if (status != EFI_SUCCESS) { >> + printf("Cannot find RSDP in EFI system table\n"); >> + return status; >> + } >> + >> + psci_set_conduit(); >> + cpu_init(); >> + /* cpu_init must be called before thread_info_init */ >> + thread_info_init(current_thread_info(), 0); >> + /* mem_init must be called before io_init */ >> + io_init(); >> + >> + timer_save_state(); >> + if (initrd) { >> + /* environ is currently the only file in the initrd */ >> + char *env = malloc(initrd_size); >> + >> + memcpy(env, initrd, initrd_size); >> + setup_env(env, initrd_size); >> + } >> + >> + if (!(auxinfo.flags & AUXINFO_MMU_OFF)) >> + setup_vm(); >> + >> + return EFI_SUCCESS; >> +} >> + >> +#endif >> diff --git a/arm/cstart.S b/arm/cstart.S >> index dc324c5..66a55b9 100644 >> --- a/arm/cstart.S >> +++ b/arm/cstart.S >> @@ -256,6 +256,7 @@ asm_mmu_disable: >> * >> * Input r0 is the stack top, which is the exception stacks base >> */ >> +.globl exceptions_init >> exceptions_init: >> mrc p15, 0, r2, c1, c0, 0 @ read SCTLR >> bic r2, #CR_V @ SCTLR.V := 0 >> diff --git a/arm/cstart64.S b/arm/cstart64.S >> index 390feb9..55b41ea 100644 >> --- a/arm/cstart64.S >> +++ b/arm/cstart64.S >> @@ -276,6 +276,7 @@ asm_mmu_disable: >> * Vectors >> */ >> >> +.globl exceptions_init >> exceptions_init: >> adrp x4, vector_table >> add x4, x4, :lo12:vector_table >> -- >> 2.25.1 >>