[-- Attachment #1: Type: text/plain, Size: 1590 bytes --] This adds support for the EFI System Resource Table. This involves reserving the table in Xen and adding a new hypercall so that dom0 can access it. Changes since v2: - Use the esrt_desc global variable instead of passing it as a function parameter - Add an overflow check for the ESRT size - Create a new memory region for the ESRT to avoid wasting memory - Add hypercall to retrieve the ESRT - Add file local variables used during development - Remove extra consts - Follow the EFI naming convention in struct definitions - Move struct definitions to header file - Fix inverted logic in overflow check - Remove BUILD_BUG_ON()s - Avoid overriding attribute of memory descriptor containing ESRT Changes since v1: - Remove the esrt_status enum - Use EFI types - Fix style nits - Remove an unused overflow check Demi Marie Obenour (4): Grab the EFI System Resource Table and check it Add a dedicated memory region for the ESRT Add a new hypercall to get the ESRT Add emacs file-local variables xen/arch/arm/efi/efi-boot.h | 1 + xen/arch/x86/efi/efi-boot.h | 67 +++++++++++++++++++++++++-------- xen/arch/x86/include/asm/e820.h | 2 +- xen/common/efi/boot.c | 65 ++++++++++++++++++++++++++++++-- xen/common/efi/efi.h | 20 ++++++++++ xen/common/efi/runtime.c | 27 ++++++++++++- xen/include/efi/efiapi.h | 3 ++ xen/include/public/platform.h | 7 ++++ 8 files changed, 172 insertions(+), 20 deletions(-) -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
[-- Attachment #1: Type: text/plain, Size: 8494 bytes --] The EFI System Resource Table (ESRT) is necessary for fwupd to identify firmware updates to install. According to the UEFI specification §23.4, the table shall be stored in memory of type EfiBootServicesData. Therefore, Xen must avoid reusing that memory for other purposes, so that Linux can access the ESRT. Additionally, Xen must mark the memory as reserved, so that Linux knows accessing it is safe. See https://lore.kernel.org/xen-devel/20200818184018.GN1679@mail-itl/T/ for details. Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com> --- xen/arch/arm/efi/efi-boot.h | 1 + xen/arch/x86/efi/efi-boot.h | 2 +- xen/common/efi/boot.c | 50 ++++++++++++++++++++++++++++++++++--- xen/common/efi/efi.h | 18 +++++++++++++ xen/common/efi/runtime.c | 3 ++- xen/include/efi/efiapi.h | 3 +++ 6 files changed, 72 insertions(+), 5 deletions(-) diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h index e452b687d8..ab2ad3dfe0 100644 --- a/xen/arch/arm/efi/efi-boot.h +++ b/xen/arch/arm/efi/efi-boot.h @@ -188,6 +188,7 @@ static EFI_STATUS __init efi_process_memory_map_bootinfo(EFI_MEMORY_DESCRIPTOR * desc_ptr->Type == EfiLoaderCode || desc_ptr->Type == EfiLoaderData || (!map_bs && + desc_ptr != esrt_desc && (desc_ptr->Type == EfiBootServicesCode || desc_ptr->Type == EfiBootServicesData))) ) { diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h index 6e65b569b0..75937c8a11 100644 --- a/xen/arch/x86/efi/efi-boot.h +++ b/xen/arch/x86/efi/efi-boot.h @@ -171,7 +171,7 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable, { case EfiBootServicesCode: case EfiBootServicesData: - if ( map_bs ) + if ( map_bs || desc == (EFI_MEMORY_DESCRIPTOR *)esrt_desc ) { default: type = E820_RESERVED; diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c index ac1b235372..31664818c1 100644 --- a/xen/common/efi/boot.c +++ b/xen/common/efi/boot.c @@ -567,6 +567,38 @@ static int __init efi_check_dt_boot(const EFI_LOADED_IMAGE *loaded_image) } #endif +static UINTN __initdata esrt = EFI_INVALID_TABLE_ADDR; + +static bool __init is_esrt_valid( + const EFI_MEMORY_DESCRIPTOR *const desc) +{ + size_t available_len, len; + const UINTN physical_start = desc->PhysicalStart; + const ESRT *esrt_ptr; + + len = desc->NumberOfPages << EFI_PAGE_SHIFT; + if ( esrt == EFI_INVALID_TABLE_ADDR ) + return false; + if ( physical_start > esrt || esrt - physical_start >= len ) + return false; + /* + * The specification requires EfiBootServicesData, but accept + * EfiRuntimeServicesData for compatibility + */ + if ( (desc->Type != EfiRuntimeServicesData) && + (desc->Type != EfiBootServicesData) ) + return false; + available_len = len - (esrt - physical_start); + if ( available_len < sizeof(*esrt_ptr) ) + return false; + esrt_ptr = (const ESRT *)esrt; + if ( esrt_ptr->Version != 1 || !esrt_ptr->Count ) + return false; + return esrt_ptr->Count <= + (available_len - sizeof(*esrt_ptr)) / + sizeof(esrt_ptr->Entries[0]); +} + /* * Include architecture specific implementation here, which references the * static globals defined above. @@ -857,6 +889,7 @@ static void __init efi_tables(void) static EFI_GUID __initdata mps_guid = MPS_TABLE_GUID; static EFI_GUID __initdata smbios_guid = SMBIOS_TABLE_GUID; static EFI_GUID __initdata smbios3_guid = SMBIOS3_TABLE_GUID; + static EFI_GUID __initdata esrt_guid = ESRT_GUID; if ( match_guid(&acpi2_guid, &efi_ct[i].VendorGuid) ) efi.acpi20 = (unsigned long)efi_ct[i].VendorTable; @@ -868,6 +901,8 @@ static void __init efi_tables(void) efi.smbios = (unsigned long)efi_ct[i].VendorTable; if ( match_guid(&smbios3_guid, &efi_ct[i].VendorGuid) ) efi.smbios3 = (unsigned long)efi_ct[i].VendorTable; + if ( match_guid(&esrt_guid, &efi_ct[i].VendorGuid) ) + esrt = (UINTN)efi_ct[i].VendorTable; } #ifndef CONFIG_ARM /* TODO - disabled until implemented on ARM */ @@ -1056,19 +1091,19 @@ static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *Syste EFI_STATUS status; UINTN info_size = 0, map_key; bool retry; -#ifdef CONFIG_EFI_SET_VIRTUAL_ADDRESS_MAP unsigned int i; -#endif efi_bs->GetMemoryMap(&info_size, NULL, &map_key, &efi_mdesc_size, &mdesc_ver); - info_size += 8 * efi_mdesc_size; + info_size += 8 * (efi_mdesc_size + 1); efi_memmap = efi_arch_allocate_mmap_buffer(info_size); if ( !efi_memmap ) blexit(L"Unable to allocate memory for EFI memory map"); for ( retry = false; ; retry = true ) { + esrt_desc = (const EFI_MEMORY_DESCRIPTOR *)EFI_INVALID_TABLE_ADDR; + efi_memmap_size = info_size; status = SystemTable->BootServices->GetMemoryMap(&efi_memmap_size, efi_memmap, &map_key, @@ -1077,6 +1112,15 @@ static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *Syste if ( EFI_ERROR(status) ) PrintErrMesg(L"Cannot obtain memory map", status); + for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size ) + { + if ( is_esrt_valid(efi_memmap + i) ) + { + esrt_desc = efi_memmap + i; + break; + } + } + efi_arch_process_memory_map(SystemTable, efi_memmap, efi_memmap_size, efi_mdesc_size, mdesc_ver); diff --git a/xen/common/efi/efi.h b/xen/common/efi/efi.h index c9aa65d506..02f499071a 100644 --- a/xen/common/efi/efi.h +++ b/xen/common/efi/efi.h @@ -10,6 +10,23 @@ #include <xen/spinlock.h> #include <asm/page.h> +typedef struct _ESRT_ENTRY { + EFI_GUID FwClass; + UINT32 FwType; + UINT32 FwVersion; + UINT32 FwLowestSupportedVersion; + UINT32 FwCapsuleFlags; + UINT32 FwLastAttemptVersion; + UINT32 FwLastAttemptStatus; +} ESRT_ENTRY; + +typedef struct _ESRT { + UINT32 Count; + UINT32 Max; + UINT64 Version; + ESRT_ENTRY Entries[]; +} ESRT; + struct efi_pci_rom { const struct efi_pci_rom *next; u16 vendor, devid, segment; @@ -28,6 +45,7 @@ extern const EFI_RUNTIME_SERVICES *efi_rs; extern UINTN efi_memmap_size, efi_mdesc_size; extern void *efi_memmap; +extern const EFI_MEMORY_DESCRIPTOR *esrt_desc; #ifdef CONFIG_X86 extern mfn_t efi_l4_mfn; diff --git a/xen/common/efi/runtime.c b/xen/common/efi/runtime.c index 13b0975866..0d09647952 100644 --- a/xen/common/efi/runtime.c +++ b/xen/common/efi/runtime.c @@ -52,6 +52,7 @@ static unsigned int efi_rs_on_cpu = NR_CPUS; UINTN __read_mostly efi_memmap_size; UINTN __read_mostly efi_mdesc_size; void *__read_mostly efi_memmap; +const EFI_MEMORY_DESCRIPTOR *__read_mostly esrt_desc; UINT64 __read_mostly efi_boot_max_var_store_size; UINT64 __read_mostly efi_boot_remain_var_store_size; @@ -269,7 +270,7 @@ int efi_get_info(uint32_t idx, union xenpf_efi_info *info) case XEN_FW_EFI_MEM_INFO: for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size ) { - EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; + const EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; u64 len = desc->NumberOfPages << EFI_PAGE_SHIFT; if ( info->mem.addr >= desc->PhysicalStart && diff --git a/xen/include/efi/efiapi.h b/xen/include/efi/efiapi.h index a616d1238a..42ef3e1c8c 100644 --- a/xen/include/efi/efiapi.h +++ b/xen/include/efi/efiapi.h @@ -882,6 +882,9 @@ typedef struct _EFI_BOOT_SERVICES { #define SAL_SYSTEM_TABLE_GUID \ { 0xeb9d2d32, 0x2d88, 0x11d3, {0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} } +#define ESRT_GUID \ + { 0xb122a263, 0x3661, 0x4f68, {0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80} } + typedef struct _EFI_CONFIGURATION_TABLE { EFI_GUID VendorGuid; -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
[-- Attachment #1: Type: text/plain, Size: 5386 bytes --] This allows the ESRT to be marked as reserved without having to waste a potentially large amount of memory. This patch assumes that Xen can handle memory regions that are not page-aligned. If it cannot, additional code will need to be added to align the regions. --- xen/arch/x86/efi/efi-boot.h | 69 +++++++++++++++++++++++++-------- xen/arch/x86/include/asm/e820.h | 2 +- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h index 75937c8a11..edf1fea3e0 100644 --- a/xen/arch/x86/efi/efi-boot.h +++ b/xen/arch/x86/efi/efi-boot.h @@ -165,13 +165,14 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable, { EFI_MEMORY_DESCRIPTOR *desc = map + i; u64 len = desc->NumberOfPages << EFI_PAGE_SHIFT; + UINTN physical_start = desc->PhysicalStart; u32 type; switch ( desc->Type ) { case EfiBootServicesCode: case EfiBootServicesData: - if ( map_bs || desc == (EFI_MEMORY_DESCRIPTOR *)esrt_desc ) + if ( map_bs ) { default: type = E820_RESERVED; @@ -179,9 +180,9 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable, } /* fall through */ case EfiConventionalMemory: - if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 && - len >= cfg.size && desc->PhysicalStart + len > cfg.addr ) - cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK; + if ( !trampoline_phys && physical_start + len <= 0x100000 && + len >= cfg.size && physical_start + len > cfg.addr ) + cfg.addr = (physical_start + len - cfg.size) & PAGE_MASK; /* fall through */ case EfiLoaderCode: case EfiLoaderData: @@ -198,21 +199,57 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable, type = E820_NVS; break; } - if ( e820_raw.nr_map && type == e->type && - desc->PhysicalStart == e->addr + e->size ) - e->size += len; - else if ( !len || e820_raw.nr_map >= ARRAY_SIZE(e820_raw.map) ) - continue; - else + +#define ADD_ENTRY(len, type_, physical_start) \ + if ( len ) \ + { \ + if ( e820_raw.nr_map && (type_) == e->type && \ + (physical_start) == e->addr + e->size ) \ + e->size += (len); \ + else if ( e820_raw.nr_map < ARRAY_SIZE(e820_raw.map) ) \ + continue; \ + else \ + { \ + ++e; \ + e->addr = (physical_start); \ + e->size = (len); \ + e->type = (type_); \ + ++e820_raw.nr_map; \ + } \ + } \ + else \ + do {} while (0) + + if ( desc == (EFI_MEMORY_DESCRIPTOR *)esrt_desc ) { - ++e; - e->addr = desc->PhysicalStart; - e->size = len; - e->type = type; - ++e820_raw.nr_map; + const ESRT *esrt_ptr; + UINTN esrt_offset, esrt_len; + + BUG_ON(physical_start > esrt); + BUG_ON(len < sizeof(*esrt_ptr)); + esrt_offset = esrt - physical_start; + + BUG_ON(len - sizeof(*esrt_ptr) < esrt_offset); + esrt_ptr = (const ESRT *)esrt; + + BUG_ON(esrt_ptr->Version != 1); + BUG_ON(esrt_ptr->Count < 1); + + esrt_len = (esrt_ptr->Count + 1) * sizeof(*esrt_ptr); + + BUG_ON( len - esrt_offset < esrt_len ); + + ADD_ENTRY(esrt_offset, type, physical_start); + + ADD_ENTRY(esrt_len, E820_RESERVED, esrt); + + physical_start = esrt + esrt_len; + len -= esrt_offset + esrt_len; } - } + ADD_ENTRY(len, type, physical_start); + } +#undef ADD_ENTRY } static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size) diff --git a/xen/arch/x86/include/asm/e820.h b/xen/arch/x86/include/asm/e820.h index 92f5efa4f5..98eca96425 100644 --- a/xen/arch/x86/include/asm/e820.h +++ b/xen/arch/x86/include/asm/e820.h @@ -16,7 +16,7 @@ struct __packed e820entry { uint32_t type; }; -#define E820MAX 1024 +#define E820MAX 1026 struct e820map { unsigned int nr_map; -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
[-- Attachment #1: Type: text/plain, Size: 4362 bytes --] This hypercall can be used to get the ESRT from the hypervisor. It returning successfully also indicates that Xen has reserved the ESRT and it can safely be parsed by dom0. --- xen/common/efi/boot.c | 15 ++++++++++----- xen/common/efi/efi.h | 2 ++ xen/common/efi/runtime.c | 14 ++++++++++++++ xen/include/public/platform.h | 7 +++++++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c index 31664818c1..01b2409c5e 100644 --- a/xen/common/efi/boot.c +++ b/xen/common/efi/boot.c @@ -567,8 +567,6 @@ static int __init efi_check_dt_boot(const EFI_LOADED_IMAGE *loaded_image) } #endif -static UINTN __initdata esrt = EFI_INVALID_TABLE_ADDR; - static bool __init is_esrt_valid( const EFI_MEMORY_DESCRIPTOR *const desc) { @@ -594,9 +592,13 @@ static bool __init is_esrt_valid( esrt_ptr = (const ESRT *)esrt; if ( esrt_ptr->Version != 1 || !esrt_ptr->Count ) return false; - return esrt_ptr->Count <= - (available_len - sizeof(*esrt_ptr)) / - sizeof(esrt_ptr->Entries[0]); + if ( esrt_ptr->Count > + (available_len - sizeof(*esrt_ptr)) / + sizeof(esrt_ptr->Entries[0]) ) + return false; + esrt_size = sizeof(*esrt_ptr) + + esrt_ptr->Count * sizeof(esrt_ptr->Entries[0]); + return true; } /* @@ -1121,6 +1123,9 @@ static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *Syste } } + if ( esrt_desc == (const EFI_MEMORY_DESCRIPTOR *)EFI_INVALID_TABLE_ADDR ) + esrt = EFI_INVALID_TABLE_ADDR; + efi_arch_process_memory_map(SystemTable, efi_memmap, efi_memmap_size, efi_mdesc_size, mdesc_ver); diff --git a/xen/common/efi/efi.h b/xen/common/efi/efi.h index 02f499071a..0736662ebc 100644 --- a/xen/common/efi/efi.h +++ b/xen/common/efi/efi.h @@ -46,6 +46,8 @@ extern const EFI_RUNTIME_SERVICES *efi_rs; extern UINTN efi_memmap_size, efi_mdesc_size; extern void *efi_memmap; extern const EFI_MEMORY_DESCRIPTOR *esrt_desc; +extern UINTN esrt; +extern UINTN esrt_size; #ifdef CONFIG_X86 extern mfn_t efi_l4_mfn; diff --git a/xen/common/efi/runtime.c b/xen/common/efi/runtime.c index 0d09647952..4466d5379c 100644 --- a/xen/common/efi/runtime.c +++ b/xen/common/efi/runtime.c @@ -227,6 +227,12 @@ const CHAR16 *wmemchr(const CHAR16 *s, CHAR16 c, UINTN n) #endif /* COMPAT */ #ifndef CONFIG_ARM /* TODO - disabled until implemented on ARM */ + +#ifndef COMPAT +UINTN esrt = EFI_INVALID_TABLE_ADDR; +UINTN esrt_size = 0; +#endif + int efi_get_info(uint32_t idx, union xenpf_efi_info *info) { unsigned int i, n; @@ -311,6 +317,14 @@ int efi_get_info(uint32_t idx, union xenpf_efi_info *info) info->apple_properties.size = efi_apple_properties_len; break; + case XEN_FW_EFI_ESRT: + if ( esrt_desc == (const EFI_MEMORY_DESCRIPTOR *)EFI_INVALID_TABLE_ADDR ) + return -ENODATA; + if ( info->esrt.size < esrt_size ) + return -ERANGE; + if ( copy_to_guest(info->esrt.table, (const ESRT *)esrt, esrt_size) ) + return -EFAULT; + break; default: return -EINVAL; } diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h index 8100133509..a848df2066 100644 --- a/xen/include/public/platform.h +++ b/xen/include/public/platform.h @@ -243,6 +243,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); #define XEN_FW_EFI_RT_VERSION 4 #define XEN_FW_EFI_PCI_ROM 5 #define XEN_FW_EFI_APPLE_PROPERTIES 6 +#define XEN_FW_EFI_ESRT 7 #define XEN_FW_KBD_SHIFT_FLAGS 5 struct xenpf_firmware_info { /* IN variables. */ @@ -307,6 +308,12 @@ struct xenpf_firmware_info { uint64_t address; xen_ulong_t size; } apple_properties; + struct { + /* IN variables */ + uint64_t size; + /* OUT variables */ + XEN_GUEST_HANDLE(void) table; + } esrt; } efi_info; /* XEN_FW_EFI_INFO */ /* Int16, Fn02: Get keyboard shift flags. */ -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1106 bytes --] These were very helpful for me when I was working on the code. --- xen/common/efi/boot.c | 10 ++++++++++ xen/common/efi/runtime.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c index 01b2409c5e..5415785bef 100644 --- a/xen/common/efi/boot.c +++ b/xen/common/efi/boot.c @@ -1802,3 +1802,13 @@ void __init efi_init_memory(void) unmap_domain_page(efi_l4t); } #endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/efi/runtime.c b/xen/common/efi/runtime.c index 4466d5379c..8021ecaecc 100644 --- a/xen/common/efi/runtime.c +++ b/xen/common/efi/runtime.c @@ -719,3 +719,13 @@ int efi_runtime_call(struct xenpf_efi_runtime_call *op) return rc; } #endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
On 19.04.2022 17:40, Demi Marie Obenour wrote: > --- a/xen/arch/x86/efi/efi-boot.h > +++ b/xen/arch/x86/efi/efi-boot.h > @@ -171,7 +171,7 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable, > { > case EfiBootServicesCode: > case EfiBootServicesData: > - if ( map_bs ) > + if ( map_bs || desc == (EFI_MEMORY_DESCRIPTOR *)esrt_desc ) No need for the cast afaics, even more so that it casts away const-ness. > --- a/xen/common/efi/boot.c > +++ b/xen/common/efi/boot.c > @@ -567,6 +567,38 @@ static int __init efi_check_dt_boot(const EFI_LOADED_IMAGE *loaded_image) > } > #endif > > +static UINTN __initdata esrt = EFI_INVALID_TABLE_ADDR; > + > +static bool __init is_esrt_valid( > + const EFI_MEMORY_DESCRIPTOR *const desc) As indicated elsewhere before, while we want to have const on pointed-to types whenever possible, the 2nd const here is unusual in our code base and hence would imo better be omitted. > @@ -1056,19 +1091,19 @@ static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *Syste > EFI_STATUS status; > UINTN info_size = 0, map_key; > bool retry; > -#ifdef CONFIG_EFI_SET_VIRTUAL_ADDRESS_MAP > unsigned int i; > -#endif > > efi_bs->GetMemoryMap(&info_size, NULL, &map_key, > &efi_mdesc_size, &mdesc_ver); > - info_size += 8 * efi_mdesc_size; > + info_size += 8 * (efi_mdesc_size + 1); What is this needed for? Does this perhaps belong into a later patch? > --- a/xen/common/efi/efi.h > +++ b/xen/common/efi/efi.h > @@ -10,6 +10,23 @@ > #include <xen/spinlock.h> > #include <asm/page.h> > > +typedef struct _ESRT_ENTRY { > + EFI_GUID FwClass; > + UINT32 FwType; > + UINT32 FwVersion; > + UINT32 FwLowestSupportedVersion; > + UINT32 FwCapsuleFlags; > + UINT32 FwLastAttemptVersion; > + UINT32 FwLastAttemptStatus; > +} ESRT_ENTRY; > + > +typedef struct _ESRT { > + UINT32 Count; > + UINT32 Max; > + UINT64 Version; > + ESRT_ENTRY Entries[]; > +} ESRT; The names in the spec, which (as said before) we're trying to follow along with the gnu-efi package, where we would generally be taking things from, are EFI_SYSTEM_RESOURCE_ENTRY and EFI_SYSTEM_RESOURCE_TABLE. The field names of the former also don't all start with "Fw". The field names of the latter are still quite far off of what the spec says. Also, why did you move this here? There's no need to expose things in a header which are used by a single CU. > @@ -269,7 +270,7 @@ int efi_get_info(uint32_t idx, union xenpf_efi_info *info) > case XEN_FW_EFI_MEM_INFO: > for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size ) > { > - EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; > + const EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; While I don't mind this change, it also looks unrelated. Perhaps again needed by (and then supposed to be in) a later patch? Jan
On 19.04.2022 17:40, Demi Marie Obenour wrote: > This allows the ESRT to be marked as reserved without having to waste a > potentially large amount of memory. This patch assumes that Xen can > handle memory regions that are not page-aligned. If it cannot, > additional code will need to be added to align the regions. > --- This lacks an S-o-b and perhaps also a Suggested-by or Requested-by. As to the mentioned assumption, I'm of the opinion that you as the author would need to check whether the assumption holds or whether, as you say, more code needs to be added. Or else I think such a change would want tagging as RFC. > @@ -198,21 +199,57 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable, > type = E820_NVS; > break; > } > - if ( e820_raw.nr_map && type == e->type && > - desc->PhysicalStart == e->addr + e->size ) > - e->size += len; > - else if ( !len || e820_raw.nr_map >= ARRAY_SIZE(e820_raw.map) ) > - continue; > - else > + > +#define ADD_ENTRY(len, type_, physical_start) \ I think the order would be less unexpected as (start, len, type), especially when actually seeing the macro in use further down. > + if ( len ) \ > + { \ > + if ( e820_raw.nr_map && (type_) == e->type && \ > + (physical_start) == e->addr + e->size ) \ > + e->size += (len); \ > + else if ( e820_raw.nr_map < ARRAY_SIZE(e820_raw.map) ) \ > + continue; \ > + else \ > + { \ > + ++e; \ > + e->addr = (physical_start); \ > + e->size = (len); \ > + e->type = (type_); \ > + ++e820_raw.nr_map; \ > + } \ > + } \ > + else \ > + do {} while (0) This is odd to see. What we usually do in such cases is to enclose the whole construct in do { ... } while (0), or to convert the statement to an expression, by enclosing it in ({ }). > + if ( desc == (EFI_MEMORY_DESCRIPTOR *)esrt_desc ) > { > - ++e; > - e->addr = desc->PhysicalStart; > - e->size = len; > - e->type = type; > - ++e820_raw.nr_map; > + const ESRT *esrt_ptr; > + UINTN esrt_offset, esrt_len; > + > + BUG_ON(physical_start > esrt); > + BUG_ON(len < sizeof(*esrt_ptr)); > + esrt_offset = esrt - physical_start; > + > + BUG_ON(len - sizeof(*esrt_ptr) < esrt_offset); > + esrt_ptr = (const ESRT *)esrt; > + > + BUG_ON(esrt_ptr->Version != 1); > + BUG_ON(esrt_ptr->Count < 1); > + > + esrt_len = (esrt_ptr->Count + 1) * sizeof(*esrt_ptr); > + > + BUG_ON( len - esrt_offset < esrt_len ); Nit: Excess blanks immediately inside the parentheses. > --- a/xen/arch/x86/include/asm/e820.h > +++ b/xen/arch/x86/include/asm/e820.h > @@ -16,7 +16,7 @@ struct __packed e820entry { > uint32_t type; > }; > > -#define E820MAX 1024 > +#define E820MAX 1026 Why? Jan
On 19.04.2022 17:40, Demi Marie Obenour wrote:
> @@ -1056,19 +1091,19 @@ static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *Syste
> EFI_STATUS status;
> UINTN info_size = 0, map_key;
> bool retry;
> -#ifdef CONFIG_EFI_SET_VIRTUAL_ADDRESS_MAP
> unsigned int i;
> -#endif
>
> efi_bs->GetMemoryMap(&info_size, NULL, &map_key,
> &efi_mdesc_size, &mdesc_ver);
> - info_size += 8 * efi_mdesc_size;
> + info_size += 8 * (efi_mdesc_size + 1);
> efi_memmap = efi_arch_allocate_mmap_buffer(info_size);
> if ( !efi_memmap )
> blexit(L"Unable to allocate memory for EFI memory map");
>
> for ( retry = false; ; retry = true )
> {
> + esrt_desc = (const EFI_MEMORY_DESCRIPTOR *)EFI_INVALID_TABLE_ADDR;
Sorry, one more question here: Why is NULL not good enough?
Jan
On 19.04.2022 17:49, Demi Marie Obenour wrote:
> This hypercall can be used to get the ESRT from the hypervisor. It
> returning successfully also indicates that Xen has reserved the ESRT and
> it can safely be parsed by dom0.
I'm not convinced of the need, and I view such an addition as inconsistent
with the original intentions. The pointer comes from the config table,
which Dom0 already has access to. All a Dom0 kernel may need to know in
addition is whether the range was properly reserved. This could be achieved
by splitting the EFI memory map entry in patch 2, instead of only splitting
the E820 derivation, as then XEN_FW_EFI_MEM_INFO can be used to find out
the range's type. Another way to find out would be for Dom0 to attempt to
map this area as MMIO, after first checking that no part of the range is in
its own memory allocation. This 2nd approach may, however, not really be
suitable for PVH Dom0, I think.
Jan
On 19.04.2022 17:40, Demi Marie Obenour wrote:
> --- a/xen/include/efi/efiapi.h
> +++ b/xen/include/efi/efiapi.h
> @@ -882,6 +882,9 @@ typedef struct _EFI_BOOT_SERVICES {
> #define SAL_SYSTEM_TABLE_GUID \
> { 0xeb9d2d32, 0x2d88, 0x11d3, {0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
>
> +#define ESRT_GUID \
> + { 0xb122a263, 0x3661, 0x4f68, {0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80} }
> +
I'm sorry, yet one more remark: This should go here only if the gnu-efi
package also has it there. Otherwise it should be added next to the
other GUIDs in efi/boot.c. This is to make updating of this header from
newer gnu-efi versions as straightforward as possible.
Also please once again use the name from the spec,
EFI_SYSTEM_RESOURCE_TABLE_GUID.
Jan
[-- Attachment #1: Type: text/plain, Size: 1803 bytes --] On Wed, Apr 27, 2022 at 10:56:34AM +0200, Jan Beulich wrote: > On 19.04.2022 17:49, Demi Marie Obenour wrote: > > This hypercall can be used to get the ESRT from the hypervisor. It > > returning successfully also indicates that Xen has reserved the ESRT and > > it can safely be parsed by dom0. > > I'm not convinced of the need, and I view such an addition as inconsistent > with the original intentions. The pointer comes from the config table, > which Dom0 already has access to. All a Dom0 kernel may need to know in > addition is whether the range was properly reserved. This could be achieved > by splitting the EFI memory map entry in patch 2, instead of only splitting > the E820 derivation, as then XEN_FW_EFI_MEM_INFO can be used to find out > the range's type. Another way to find out would be for Dom0 to attempt to > map this area as MMIO, after first checking that no part of the range is in > its own memory allocation. This 2nd approach may, however, not really be > suitable for PVH Dom0, I think. On further thought, I think the hypercall approach is actually better than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to return anything other than the actual firmware-provided memory information, and the current approach seems to require more and more special-casing of the ESRT, not to mention potentially wasting memory and splitting a potentially large memory region into two smaller ones. By copying the entire ESRT into memory owned by Xen, the logic becomes significantly simpler on both the Xen and dom0 sides. Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? Is it possible that the ESRT is so large that this causes boot to fail? -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
On 27.04.2022 21:08, Demi Marie Obenour wrote: > On Wed, Apr 27, 2022 at 10:56:34AM +0200, Jan Beulich wrote: >> On 19.04.2022 17:49, Demi Marie Obenour wrote: >>> This hypercall can be used to get the ESRT from the hypervisor. It >>> returning successfully also indicates that Xen has reserved the ESRT and >>> it can safely be parsed by dom0. >> >> I'm not convinced of the need, and I view such an addition as inconsistent >> with the original intentions. The pointer comes from the config table, >> which Dom0 already has access to. All a Dom0 kernel may need to know in >> addition is whether the range was properly reserved. This could be achieved >> by splitting the EFI memory map entry in patch 2, instead of only splitting >> the E820 derivation, as then XEN_FW_EFI_MEM_INFO can be used to find out >> the range's type. Another way to find out would be for Dom0 to attempt to >> map this area as MMIO, after first checking that no part of the range is in >> its own memory allocation. This 2nd approach may, however, not really be >> suitable for PVH Dom0, I think. > > On further thought, I think the hypercall approach is actually better > than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to > return anything other than the actual firmware-provided memory > information, and the current approach seems to require more and more > special-casing of the ESRT, not to mention potentially wasting memory > and splitting a potentially large memory region into two smaller ones. > By copying the entire ESRT into memory owned by Xen, the logic becomes > significantly simpler on both the Xen and dom0 sides. I actually did consider the option of making a private copy when you did send the initial version of this, but I'm not convinced this simplifies things from a kernel perspective: They'd now need to discover the table by some entirely different means. In Linux at least such divergence "just for Xen" hasn't been liked in the past. There's also the question of how to propagate the information across kexec. But I guess that question exists even outside of Xen, with the area living in memory which the OS is expected to recycle. > Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? I'd suggest to try hard to avoid ebmalloc(). It ought to be possible to make the copy before ExitBootServices(), via normal EFI allocation. If replacing a pointer in the config table was okay(ish), this could even be utilized to overcome the kexec problem. > Is it possible that the ESRT is so large that this causes boot to fail? I don't know - that's a question firmware folks would need to answer. Jan
[-- Attachment #1: Type: text/plain, Size: 3607 bytes --] On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote: > On 27.04.2022 21:08, Demi Marie Obenour wrote: > > On Wed, Apr 27, 2022 at 10:56:34AM +0200, Jan Beulich wrote: > >> On 19.04.2022 17:49, Demi Marie Obenour wrote: > >>> This hypercall can be used to get the ESRT from the hypervisor. It > >>> returning successfully also indicates that Xen has reserved the ESRT and > >>> it can safely be parsed by dom0. > >> > >> I'm not convinced of the need, and I view such an addition as inconsistent > >> with the original intentions. The pointer comes from the config table, > >> which Dom0 already has access to. All a Dom0 kernel may need to know in > >> addition is whether the range was properly reserved. This could be achieved > >> by splitting the EFI memory map entry in patch 2, instead of only splitting > >> the E820 derivation, as then XEN_FW_EFI_MEM_INFO can be used to find out > >> the range's type. Another way to find out would be for Dom0 to attempt to > >> map this area as MMIO, after first checking that no part of the range is in > >> its own memory allocation. This 2nd approach may, however, not really be > >> suitable for PVH Dom0, I think. > > > > On further thought, I think the hypercall approach is actually better > > than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to > > return anything other than the actual firmware-provided memory > > information, and the current approach seems to require more and more > > special-casing of the ESRT, not to mention potentially wasting memory > > and splitting a potentially large memory region into two smaller ones. > > By copying the entire ESRT into memory owned by Xen, the logic becomes > > significantly simpler on both the Xen and dom0 sides. > > I actually did consider the option of making a private copy when you did > send the initial version of this, but I'm not convinced this simplifies > things from a kernel perspective: They'd now need to discover the table > by some entirely different means. In Linux at least such divergence > "just for Xen" hasn't been liked in the past. > > There's also the question of how to propagate the information across > kexec. But I guess that question exists even outside of Xen, with the > area living in memory which the OS is expected to recycle. Indeed it does. A simple rule might be, “Only trust the ESRT if it is in memory of type EfiRuntimeServicesData.” That is easy to achieve by monkeypatching the config table as you suggested below. I *am* worried that the config table might be mapped read-only on some systems, in which case the overwrite would cause a fatal page fault. Is there a way for Xen to check for this? It could also be undefined behavior to modify it. > > Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? > > I'd suggest to try hard to avoid ebmalloc(). It ought to be possible to > make the copy before ExitBootServices(), via normal EFI allocation. If > replacing a pointer in the config table was okay(ish), this could even > be utilized to overcome the kexec problem. What type should I use for the allocation? EfiLoaderData looks like the most consistent choice, but I am not sure if memory so allocated remains valid when Xen hands off to the OS, so EfiRuntimeServicesData might be a better choice. To avoid memory leaks from repeated kexec(), this could be made conditional on the ESRT not being in memory of type EfiRuntimeServicesData to begin with. -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
On 29.04.2022 00:54, Demi Marie Obenour wrote: > On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote: >> On 27.04.2022 21:08, Demi Marie Obenour wrote: >>> On Wed, Apr 27, 2022 at 10:56:34AM +0200, Jan Beulich wrote: >>>> On 19.04.2022 17:49, Demi Marie Obenour wrote: >>>>> This hypercall can be used to get the ESRT from the hypervisor. It >>>>> returning successfully also indicates that Xen has reserved the ESRT and >>>>> it can safely be parsed by dom0. >>>> >>>> I'm not convinced of the need, and I view such an addition as inconsistent >>>> with the original intentions. The pointer comes from the config table, >>>> which Dom0 already has access to. All a Dom0 kernel may need to know in >>>> addition is whether the range was properly reserved. This could be achieved >>>> by splitting the EFI memory map entry in patch 2, instead of only splitting >>>> the E820 derivation, as then XEN_FW_EFI_MEM_INFO can be used to find out >>>> the range's type. Another way to find out would be for Dom0 to attempt to >>>> map this area as MMIO, after first checking that no part of the range is in >>>> its own memory allocation. This 2nd approach may, however, not really be >>>> suitable for PVH Dom0, I think. >>> >>> On further thought, I think the hypercall approach is actually better >>> than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to >>> return anything other than the actual firmware-provided memory >>> information, and the current approach seems to require more and more >>> special-casing of the ESRT, not to mention potentially wasting memory >>> and splitting a potentially large memory region into two smaller ones. >>> By copying the entire ESRT into memory owned by Xen, the logic becomes >>> significantly simpler on both the Xen and dom0 sides. >> >> I actually did consider the option of making a private copy when you did >> send the initial version of this, but I'm not convinced this simplifies >> things from a kernel perspective: They'd now need to discover the table >> by some entirely different means. In Linux at least such divergence >> "just for Xen" hasn't been liked in the past. >> >> There's also the question of how to propagate the information across >> kexec. But I guess that question exists even outside of Xen, with the >> area living in memory which the OS is expected to recycle. > > Indeed it does. A simple rule might be, “Only trust the ESRT if it is > in memory of type EfiRuntimeServicesData.” That is easy to achieve by > monkeypatching the config table as you suggested below. > > I *am* worried that the config table might be mapped read-only on some > systems, in which case the overwrite would cause a fatal page fault. Is > there a way for Xen to check for this? While in boot mode, aiui page tables aren't supposed to be enforcing access restrictions. Recall that on other architectures EFI even runs with paging disabled; this simply is not possible for x86-64. So portable firmware shouldn't map anything r/o. In principle the pointer could still be in ROM; I consider this unlikely, but we could check for that (just like we could do a page table walk to figure out whether a r/o mapping would prevent us from updating the field). > It could also be undefined behavior to modify it. That's the bigger worry I have. >>> Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? >> >> I'd suggest to try hard to avoid ebmalloc(). It ought to be possible to >> make the copy before ExitBootServices(), via normal EFI allocation. If >> replacing a pointer in the config table was okay(ish), this could even >> be utilized to overcome the kexec problem. > > What type should I use for the allocation? EfiLoaderData looks like the > most consistent choice, but I am not sure if memory so allocated remains > valid when Xen hands off to the OS, so EfiRuntimeServicesData might be a > better choice. It definitely is. We do recycle EfiLoaderData ourselves. > To avoid memory leaks from repeated kexec(), this could > be made conditional on the ESRT not being in memory of type > EfiRuntimeServicesData to begin with. Of course - there's no point relocating the blob when it already is immune to recycling. Jan
[-- Attachment #1: Type: text/plain, Size: 5625 bytes --] On Fri, Apr 29, 2022 at 10:40:42AM +0200, Jan Beulich wrote: > On 29.04.2022 00:54, Demi Marie Obenour wrote: > > On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote: > >> On 27.04.2022 21:08, Demi Marie Obenour wrote: > >>> On Wed, Apr 27, 2022 at 10:56:34AM +0200, Jan Beulich wrote: > >>>> On 19.04.2022 17:49, Demi Marie Obenour wrote: > >>>>> This hypercall can be used to get the ESRT from the hypervisor. It > >>>>> returning successfully also indicates that Xen has reserved the ESRT and > >>>>> it can safely be parsed by dom0. > >>>> > >>>> I'm not convinced of the need, and I view such an addition as inconsistent > >>>> with the original intentions. The pointer comes from the config table, > >>>> which Dom0 already has access to. All a Dom0 kernel may need to know in > >>>> addition is whether the range was properly reserved. This could be achieved > >>>> by splitting the EFI memory map entry in patch 2, instead of only splitting > >>>> the E820 derivation, as then XEN_FW_EFI_MEM_INFO can be used to find out > >>>> the range's type. Another way to find out would be for Dom0 to attempt to > >>>> map this area as MMIO, after first checking that no part of the range is in > >>>> its own memory allocation. This 2nd approach may, however, not really be > >>>> suitable for PVH Dom0, I think. > >>> > >>> On further thought, I think the hypercall approach is actually better > >>> than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to > >>> return anything other than the actual firmware-provided memory > >>> information, and the current approach seems to require more and more > >>> special-casing of the ESRT, not to mention potentially wasting memory > >>> and splitting a potentially large memory region into two smaller ones. > >>> By copying the entire ESRT into memory owned by Xen, the logic becomes > >>> significantly simpler on both the Xen and dom0 sides. > >> > >> I actually did consider the option of making a private copy when you did > >> send the initial version of this, but I'm not convinced this simplifies > >> things from a kernel perspective: They'd now need to discover the table > >> by some entirely different means. In Linux at least such divergence > >> "just for Xen" hasn't been liked in the past. > >> > >> There's also the question of how to propagate the information across > >> kexec. But I guess that question exists even outside of Xen, with the > >> area living in memory which the OS is expected to recycle. > > > > Indeed it does. A simple rule might be, “Only trust the ESRT if it is > > in memory of type EfiRuntimeServicesData.” That is easy to achieve by > > monkeypatching the config table as you suggested below. > > > > I *am* worried that the config table might be mapped read-only on some > > systems, in which case the overwrite would cause a fatal page fault. Is > > there a way for Xen to check for this? > > While in boot mode, aiui page tables aren't supposed to be enforcing > access restrictions. Recall that on other architectures EFI even runs > with paging disabled; this simply is not possible for x86-64. Yikes! No wonder firmware has nonexistent exploit mitigations. They really ought to start porting UEFI to Rust, with ASLR, NX, stack canaries, a hardened allocator, and support for de-priviliged services that run in user mode. That reminds me: Can Xen itself run from ROM? Xen is being ported to POWER for use in Qubes OS, and one approach under consideration is to have Xen and a mini-dom0 be part of the firmware. Personally, I really like this approach, as it makes untrusted storage domains much simpler. If this should be a separate email thread, let me know. > So > portable firmware shouldn't map anything r/o. In principle the pointer > could still be in ROM; I consider this unlikely, but we could check > for that (just like we could do a page table walk to figure out > whether a r/o mapping would prevent us from updating the field). Is there a utility function that could be used for this? > > It could also be undefined behavior to modify it. > > That's the bigger worry I have. Turns out that it is *not* undefined behavior, so long as ExitBootServices() has not been called. This is becaues EFI drivers will modify the config table, so firmware cannot assume it to be read-only. > >>> Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? > >> > >> I'd suggest to try hard to avoid ebmalloc(). It ought to be possible to > >> make the copy before ExitBootServices(), via normal EFI allocation. If > >> replacing a pointer in the config table was okay(ish), this could even > >> be utilized to overcome the kexec problem. > > > > What type should I use for the allocation? EfiLoaderData looks like the > > most consistent choice, but I am not sure if memory so allocated remains > > valid when Xen hands off to the OS, so EfiRuntimeServicesData might be a > > better choice. > > It definitely is. We do recycle EfiLoaderData ourselves. I wonder why the ESRT was not in EfiRuntimeServicesData to begin with. > > To avoid memory leaks from repeated kexec(), this could > > be made conditional on the ESRT not being in memory of type > > EfiRuntimeServicesData to begin with. > > Of course - there's no point relocating the blob when it already is > immune to recycling. Yup. Is it reasonable for dom0 to check that the ESRT is in EfiRuntimeServicesData when under Xen? -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
On 29.04.2022 19:06, Demi Marie Obenour wrote: > On Fri, Apr 29, 2022 at 10:40:42AM +0200, Jan Beulich wrote: >> On 29.04.2022 00:54, Demi Marie Obenour wrote: >>> On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote: >>>> On 27.04.2022 21:08, Demi Marie Obenour wrote: >>>>> On further thought, I think the hypercall approach is actually better >>>>> than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to >>>>> return anything other than the actual firmware-provided memory >>>>> information, and the current approach seems to require more and more >>>>> special-casing of the ESRT, not to mention potentially wasting memory >>>>> and splitting a potentially large memory region into two smaller ones. >>>>> By copying the entire ESRT into memory owned by Xen, the logic becomes >>>>> significantly simpler on both the Xen and dom0 sides. >>>> >>>> I actually did consider the option of making a private copy when you did >>>> send the initial version of this, but I'm not convinced this simplifies >>>> things from a kernel perspective: They'd now need to discover the table >>>> by some entirely different means. In Linux at least such divergence >>>> "just for Xen" hasn't been liked in the past. >>>> >>>> There's also the question of how to propagate the information across >>>> kexec. But I guess that question exists even outside of Xen, with the >>>> area living in memory which the OS is expected to recycle. >>> >>> Indeed it does. A simple rule might be, “Only trust the ESRT if it is >>> in memory of type EfiRuntimeServicesData.” That is easy to achieve by >>> monkeypatching the config table as you suggested below. >>> >>> I *am* worried that the config table might be mapped read-only on some >>> systems, in which case the overwrite would cause a fatal page fault. Is >>> there a way for Xen to check for this? >> >> While in boot mode, aiui page tables aren't supposed to be enforcing >> access restrictions. Recall that on other architectures EFI even runs >> with paging disabled; this simply is not possible for x86-64. > > Yikes! No wonder firmware has nonexistent exploit mitigations. They > really ought to start porting UEFI to Rust, with ASLR, NX, stack > canaries, a hardened allocator, and support for de-priviliged services > that run in user mode. > > That reminds me: Can Xen itself run from ROM? I guess that could be possible in principle, but would certainly require some work. > Xen is being ported to > POWER for use in Qubes OS, and one approach under consideration is to > have Xen and a mini-dom0 be part of the firmware. Personally, I really > like this approach, as it makes untrusted storage domains much simpler. > If this should be a separate email thread, let me know. It probably should be. >> So >> portable firmware shouldn't map anything r/o. In principle the pointer >> could still be in ROM; I consider this unlikely, but we could check >> for that (just like we could do a page table walk to figure out >> whether a r/o mapping would prevent us from updating the field). > > Is there a utility function that could be used for this? I don't think there is. >>> It could also be undefined behavior to modify it. >> >> That's the bigger worry I have. > > Turns out that it is *not* undefined behavior, so long as > ExitBootServices() has not been called. This is becaues EFI drivers > will modify the config table, so firmware cannot assume it to be > read-only. Ah, right - we could even use InstallConfigurationTable() ourselves to make the adjustment. >>>>> Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? >>>> >>>> I'd suggest to try hard to avoid ebmalloc(). It ought to be possible to >>>> make the copy before ExitBootServices(), via normal EFI allocation. If >>>> replacing a pointer in the config table was okay(ish), this could even >>>> be utilized to overcome the kexec problem. >>> >>> What type should I use for the allocation? EfiLoaderData looks like the >>> most consistent choice, but I am not sure if memory so allocated remains >>> valid when Xen hands off to the OS, so EfiRuntimeServicesData might be a >>> better choice. >> >> It definitely is. We do recycle EfiLoaderData ourselves. > > I wonder why the ESRT was not in EfiRuntimeServicesData to begin with. So do I. >>> To avoid memory leaks from repeated kexec(), this could >>> be made conditional on the ESRT not being in memory of type >>> EfiRuntimeServicesData to begin with. >> >> Of course - there's no point relocating the blob when it already is >> immune to recycling. > > Yup. Is it reasonable for dom0 to check that the ESRT is in > EfiRuntimeServicesData when under Xen? I think it is, but kernel folks may not like Xen specific code in this (or about any) area. Jan
[-- Attachment #1: Type: text/plain, Size: 5818 bytes --] On Mon, May 02, 2022 at 08:24:30AM +0200, Jan Beulich wrote: > On 29.04.2022 19:06, Demi Marie Obenour wrote: > > On Fri, Apr 29, 2022 at 10:40:42AM +0200, Jan Beulich wrote: > >> On 29.04.2022 00:54, Demi Marie Obenour wrote: > >>> On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote: > >>>> On 27.04.2022 21:08, Demi Marie Obenour wrote: > >>>>> On further thought, I think the hypercall approach is actually better > >>>>> than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to > >>>>> return anything other than the actual firmware-provided memory > >>>>> information, and the current approach seems to require more and more > >>>>> special-casing of the ESRT, not to mention potentially wasting memory > >>>>> and splitting a potentially large memory region into two smaller ones. > >>>>> By copying the entire ESRT into memory owned by Xen, the logic becomes > >>>>> significantly simpler on both the Xen and dom0 sides. > >>>> > >>>> I actually did consider the option of making a private copy when you did > >>>> send the initial version of this, but I'm not convinced this simplifies > >>>> things from a kernel perspective: They'd now need to discover the table > >>>> by some entirely different means. In Linux at least such divergence > >>>> "just for Xen" hasn't been liked in the past. > >>>> > >>>> There's also the question of how to propagate the information across > >>>> kexec. But I guess that question exists even outside of Xen, with the > >>>> area living in memory which the OS is expected to recycle. > >>> > >>> Indeed it does. A simple rule might be, “Only trust the ESRT if it is > >>> in memory of type EfiRuntimeServicesData.” That is easy to achieve by > >>> monkeypatching the config table as you suggested below. > >>> > >>> I *am* worried that the config table might be mapped read-only on some > >>> systems, in which case the overwrite would cause a fatal page fault. Is > >>> there a way for Xen to check for this? > >> > >> While in boot mode, aiui page tables aren't supposed to be enforcing > >> access restrictions. Recall that on other architectures EFI even runs > >> with paging disabled; this simply is not possible for x86-64. > > > > Yikes! No wonder firmware has nonexistent exploit mitigations. They > > really ought to start porting UEFI to Rust, with ASLR, NX, stack > > canaries, a hardened allocator, and support for de-priviliged services > > that run in user mode. > > > > That reminds me: Can Xen itself run from ROM? > > I guess that could be possible in principle, but would certainly require > some work. > > > Xen is being ported to > > POWER for use in Qubes OS, and one approach under consideration is to > > have Xen and a mini-dom0 be part of the firmware. Personally, I really > > like this approach, as it makes untrusted storage domains much simpler. > > If this should be a separate email thread, let me know. > > It probably should be. I will make one at some point. > >> So > >> portable firmware shouldn't map anything r/o. In principle the pointer > >> could still be in ROM; I consider this unlikely, but we could check > >> for that (just like we could do a page table walk to figure out > >> whether a r/o mapping would prevent us from updating the field). > > > > Is there a utility function that could be used for this? > > I don't think there is. Then it is good that none is necessary :) Also, should the various bug checks I added be replaced by ASSERT()? > >>> It could also be undefined behavior to modify it. > >> > >> That's the bigger worry I have. > > > > Turns out that it is *not* undefined behavior, so long as > > ExitBootServices() has not been called. This is becaues EFI drivers > > will modify the config table, so firmware cannot assume it to be > > read-only. > > Ah, right - we could even use InstallConfigurationTable() ourselves > to make the adjustment. That is even simpler than I thought! I was worried that InstallConfigurationTable() would assume that memory for the table was allocated a certain way and cause invalid free errors, but at least TianoCore does not do that. > >>>>> Is using ebmalloc() to allocate a copy of the ESRT a reasonable option? > >>>> > >>>> I'd suggest to try hard to avoid ebmalloc(). It ought to be possible to > >>>> make the copy before ExitBootServices(), via normal EFI allocation. If > >>>> replacing a pointer in the config table was okay(ish), this could even > >>>> be utilized to overcome the kexec problem. > >>> > >>> What type should I use for the allocation? EfiLoaderData looks like the > >>> most consistent choice, but I am not sure if memory so allocated remains > >>> valid when Xen hands off to the OS, so EfiRuntimeServicesData might be a > >>> better choice. > >> > >> It definitely is. We do recycle EfiLoaderData ourselves. > > > > I wonder why the ESRT was not in EfiRuntimeServicesData to begin with. > > So do I. I suspect the assumption was that the ESRT would be parsed by the OS before ExitBootServices(), and that the OS would have no need for the ESRT after that. > >>> To avoid memory leaks from repeated kexec(), this could > >>> be made conditional on the ESRT not being in memory of type > >>> EfiRuntimeServicesData to begin with. > >> > >> Of course - there's no point relocating the blob when it already is > >> immune to recycling. > > > > Yup. Is it reasonable for dom0 to check that the ESRT is in > > EfiRuntimeServicesData when under Xen? > > I think it is, but kernel folks may not like Xen specific code in this > (or about any) area. > > Jan There is PVops et al already :) -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
On 02.05.2022 09:11, Demi Marie Obenour wrote:
> On Mon, May 02, 2022 at 08:24:30AM +0200, Jan Beulich wrote:
>> On 29.04.2022 19:06, Demi Marie Obenour wrote:
>>> On Fri, Apr 29, 2022 at 10:40:42AM +0200, Jan Beulich wrote:
>>>> On 29.04.2022 00:54, Demi Marie Obenour wrote:
>>>>> On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote:
>>>>>> On 27.04.2022 21:08, Demi Marie Obenour wrote:
>>>>>>> On further thought, I think the hypercall approach is actually better
>>>>>>> than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to
>>>>>>> return anything other than the actual firmware-provided memory
>>>>>>> information, and the current approach seems to require more and more
>>>>>>> special-casing of the ESRT, not to mention potentially wasting memory
>>>>>>> and splitting a potentially large memory region into two smaller ones.
>>>>>>> By copying the entire ESRT into memory owned by Xen, the logic becomes
>>>>>>> significantly simpler on both the Xen and dom0 sides.
>>>>>>
>>>>>> I actually did consider the option of making a private copy when you did
>>>>>> send the initial version of this, but I'm not convinced this simplifies
>>>>>> things from a kernel perspective: They'd now need to discover the table
>>>>>> by some entirely different means. In Linux at least such divergence
>>>>>> "just for Xen" hasn't been liked in the past.
>>>>>>
>>>>>> There's also the question of how to propagate the information across
>>>>>> kexec. But I guess that question exists even outside of Xen, with the
>>>>>> area living in memory which the OS is expected to recycle.
>>>>>
>>>>> Indeed it does. A simple rule might be, “Only trust the ESRT if it is
>>>>> in memory of type EfiRuntimeServicesData.” That is easy to achieve by
>>>>> monkeypatching the config table as you suggested below.
>>>>>
>>>>> I *am* worried that the config table might be mapped read-only on some
>>>>> systems, in which case the overwrite would cause a fatal page fault. Is
>>>>> there a way for Xen to check for this?
>>>>
>>>> While in boot mode, aiui page tables aren't supposed to be enforcing
>>>> access restrictions. Recall that on other architectures EFI even runs
>>>> with paging disabled; this simply is not possible for x86-64.
>>>
>>> Yikes! No wonder firmware has nonexistent exploit mitigations. They
>>> really ought to start porting UEFI to Rust, with ASLR, NX, stack
>>> canaries, a hardened allocator, and support for de-priviliged services
>>> that run in user mode.
>>>
>>> That reminds me: Can Xen itself run from ROM?
>>
>> I guess that could be possible in principle, but would certainly require
>> some work.
>>
>>> Xen is being ported to
>>> POWER for use in Qubes OS, and one approach under consideration is to
>>> have Xen and a mini-dom0 be part of the firmware. Personally, I really
>>> like this approach, as it makes untrusted storage domains much simpler.
>>> If this should be a separate email thread, let me know.
>>
>> It probably should be.
>
> I will make one at some point.
>
>>>> So
>>>> portable firmware shouldn't map anything r/o. In principle the pointer
>>>> could still be in ROM; I consider this unlikely, but we could check
>>>> for that (just like we could do a page table walk to figure out
>>>> whether a r/o mapping would prevent us from updating the field).
>>>
>>> Is there a utility function that could be used for this?
>>
>> I don't think there is.
>
> Then it is good that none is necessary :)
>
> Also, should the various bug checks I added be replaced by ASSERT()?
You mean those in the earlier patch(es)? Not sure - depends on what you
would be doing for release builds. In the cases where you simply re-
check what was checked earlier on, ASSERT() would probably indeed be
preferable over BUG_ON() (and there I wouldn't even see a strong need
to consider alternatives for release builds).
Jan
[-- Attachment #1: Type: text/plain, Size: 4329 bytes --] On Mon, May 02, 2022 at 09:37:39AM +0200, Jan Beulich wrote: > On 02.05.2022 09:11, Demi Marie Obenour wrote: > > On Mon, May 02, 2022 at 08:24:30AM +0200, Jan Beulich wrote: > >> On 29.04.2022 19:06, Demi Marie Obenour wrote: > >>> On Fri, Apr 29, 2022 at 10:40:42AM +0200, Jan Beulich wrote: > >>>> On 29.04.2022 00:54, Demi Marie Obenour wrote: > >>>>> On Thu, Apr 28, 2022 at 08:47:49AM +0200, Jan Beulich wrote: > >>>>>> On 27.04.2022 21:08, Demi Marie Obenour wrote: > >>>>>>> On further thought, I think the hypercall approach is actually better > >>>>>>> than reserving the ESRT. I really do not want XEN_FW_EFI_MEM_INFO to > >>>>>>> return anything other than the actual firmware-provided memory > >>>>>>> information, and the current approach seems to require more and more > >>>>>>> special-casing of the ESRT, not to mention potentially wasting memory > >>>>>>> and splitting a potentially large memory region into two smaller ones. > >>>>>>> By copying the entire ESRT into memory owned by Xen, the logic becomes > >>>>>>> significantly simpler on both the Xen and dom0 sides. > >>>>>> > >>>>>> I actually did consider the option of making a private copy when you did > >>>>>> send the initial version of this, but I'm not convinced this simplifies > >>>>>> things from a kernel perspective: They'd now need to discover the table > >>>>>> by some entirely different means. In Linux at least such divergence > >>>>>> "just for Xen" hasn't been liked in the past. > >>>>>> > >>>>>> There's also the question of how to propagate the information across > >>>>>> kexec. But I guess that question exists even outside of Xen, with the > >>>>>> area living in memory which the OS is expected to recycle. > >>>>> > >>>>> Indeed it does. A simple rule might be, “Only trust the ESRT if it is > >>>>> in memory of type EfiRuntimeServicesData.” That is easy to achieve by > >>>>> monkeypatching the config table as you suggested below. > >>>>> > >>>>> I *am* worried that the config table might be mapped read-only on some > >>>>> systems, in which case the overwrite would cause a fatal page fault. Is > >>>>> there a way for Xen to check for this? > >>>> > >>>> While in boot mode, aiui page tables aren't supposed to be enforcing > >>>> access restrictions. Recall that on other architectures EFI even runs > >>>> with paging disabled; this simply is not possible for x86-64. > >>> > >>> Yikes! No wonder firmware has nonexistent exploit mitigations. They > >>> really ought to start porting UEFI to Rust, with ASLR, NX, stack > >>> canaries, a hardened allocator, and support for de-priviliged services > >>> that run in user mode. > >>> > >>> That reminds me: Can Xen itself run from ROM? > >> > >> I guess that could be possible in principle, but would certainly require > >> some work. > >> > >>> Xen is being ported to > >>> POWER for use in Qubes OS, and one approach under consideration is to > >>> have Xen and a mini-dom0 be part of the firmware. Personally, I really > >>> like this approach, as it makes untrusted storage domains much simpler. > >>> If this should be a separate email thread, let me know. > >> > >> It probably should be. > > > > I will make one at some point. > > > >>>> So > >>>> portable firmware shouldn't map anything r/o. In principle the pointer > >>>> could still be in ROM; I consider this unlikely, but we could check > >>>> for that (just like we could do a page table walk to figure out > >>>> whether a r/o mapping would prevent us from updating the field). > >>> > >>> Is there a utility function that could be used for this? > >> > >> I don't think there is. > > > > Then it is good that none is necessary :) > > > > Also, should the various bug checks I added be replaced by ASSERT()? > > You mean those in the earlier patch(es)? Not sure - depends on what you > would be doing for release builds. In the cases where you simply re- > check what was checked earlier on, ASSERT() would probably indeed be > preferable over BUG_ON() (and there I wouldn't even see a strong need > to consider alternatives for release builds). Yup, that’s what the BUG_ON()s were for. I will use ASSERT() in the next round. -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
Hi, It seems that this series is stale for more than one month with maintainers comments given for [1][2] and some discussions between maintainer and author for [3]. So this email is a gentle reminder for the author about this series (no hurries and pressure though, please take your time :) ). Thanks! Kind regards, Henry [1] https://patchwork.kernel.org/project/xen-devel/patch/Yl7X3mAJhR5ENSpl@itl-email/ [2] https://patchwork.kernel.org/project/xen-devel/patch/Yl7X/dT39vvhZmho@itl-email/ [3] https://patchwork.kernel.org/project/xen-devel/patch/Yl7aC2a+TtOaFtqZ@itl-email/ > -----Original Message----- > On 19.04.2022 17:40, Demi Marie Obenour wrote: > > @@ -1056,19 +1091,19 @@ static void __init efi_exit_boot(EFI_HANDLE > ImageHandle, EFI_SYSTEM_TABLE *Syste > > EFI_STATUS status; > > UINTN info_size = 0, map_key; > > bool retry; > > -#ifdef CONFIG_EFI_SET_VIRTUAL_ADDRESS_MAP > > unsigned int i; > > -#endif > > > > efi_bs->GetMemoryMap(&info_size, NULL, &map_key, > > &efi_mdesc_size, &mdesc_ver); > > - info_size += 8 * efi_mdesc_size; > > + info_size += 8 * (efi_mdesc_size + 1); > > efi_memmap = efi_arch_allocate_mmap_buffer(info_size); > > if ( !efi_memmap ) > > blexit(L"Unable to allocate memory for EFI memory map"); > > > > for ( retry = false; ; retry = true ) > > { > > + esrt_desc = (const EFI_MEMORY_DESCRIPTOR > *)EFI_INVALID_TABLE_ADDR; > > Sorry, one more question here: Why is NULL not good enough? > > Jan >
[-- Attachment #1: Type: text/plain, Size: 522 bytes --] On Mon, May 30, 2022 at 08:47:39AM +0000, Henry Wang wrote: > Hi, > > It seems that this series is stale for more than one month with maintainers > comments given for [1][2] and some discussions between maintainer and author > for [3]. So this email is a gentle reminder for the author about this series (no hurries > and pressure though, please take your time :) ). Thanks! Thanks for the reminder. This series has been superseded by a later one. -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]