All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-efi@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ingo Molnar <mingo@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCH v9 16/23] x86/efistub: Perform 4/5 level paging switch from the stub
Date: Mon,  7 Aug 2023 18:27:13 +0200	[thread overview]
Message-ID: <20230807162720.545787-17-ardb@kernel.org> (raw)
In-Reply-To: <20230807162720.545787-1-ardb@kernel.org>

In preparation for updating the EFI stub boot flow to avoid the bare
metal decompressor code altogether, implement the support code for
switching between 4 and 5 levels of paging before jumping to the kernel
proper.

This reuses the newly refactored trampoline that the bare metal
decompressor uses, but relies on EFI APIs to allocate 32-bit addressable
memory and remap it with the appropriate permissions. Given that the
bare metal decompressor will no longer call into the trampoline if the
number of paging levels is already set correctly, it is no longer needed
to remove NX restrictions from the memory range where this trampoline
may end up.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/Makefile          |  1 +
 drivers/firmware/efi/libstub/efi-stub-helper.c |  2 +
 drivers/firmware/efi/libstub/efistub.h         |  1 +
 drivers/firmware/efi/libstub/x86-5lvl.c        | 95 ++++++++++++++++++++
 drivers/firmware/efi/libstub/x86-stub.c        | 40 +++------
 drivers/firmware/efi/libstub/x86-stub.h        | 17 ++++
 6 files changed, 130 insertions(+), 26 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 16d64a34d1e19465..ae8874401a9f1490 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB)	+= efi-stub.o string.o intrinsics.o systable.o \
 lib-$(CONFIG_ARM)		+= arm32-stub.o
 lib-$(CONFIG_ARM64)		+= arm64.o arm64-stub.o smbios.o
 lib-$(CONFIG_X86)		+= x86-stub.o
+lib-$(CONFIG_X86_64)		+= x86-5lvl.o
 lib-$(CONFIG_RISCV)		+= riscv.o riscv-stub.o
 lib-$(CONFIG_LOONGARCH)		+= loongarch.o loongarch-stub.o
 
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 732984295295fb6d..bfa30625f5d03167 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline)
 			efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
 		} else if (!strcmp(param, "noinitrd")) {
 			efi_noinitrd = true;
+		} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+			efi_no5lvl = true;
 		} else if (!strcmp(param, "efi") && val) {
 			efi_nochunk = parse_option_str(val, "nochunk");
 			efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 6aa38a1bf1265d83..06b7abc92ced9e18 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -33,6 +33,7 @@
 #define EFI_ALLOC_LIMIT		ULONG_MAX
 #endif
 
+extern bool efi_no5lvl;
 extern bool efi_nochunk;
 extern bool efi_nokaslr;
 extern int efi_loglevel;
diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c
new file mode 100644
index 0000000000000000..479dd445acdcff8d
--- /dev/null
+++ b/drivers/firmware/efi/libstub/x86-5lvl.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/efi.h>
+
+#include <asm/boot.h>
+#include <asm/desc.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+#include "x86-stub.h"
+
+bool efi_no5lvl;
+
+static void (*la57_toggle)(void *cr3);
+
+static const struct desc_struct gdt[] = {
+	[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_CS]   = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+};
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as programming
+ * a 64-bit value into CR3 when running in 32-bit mode is not supported.
+ */
+efi_status_t efi_setup_5level_paging(void)
+{
+	u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
+	efi_status_t status;
+	u8 *la57_code;
+
+	if (!efi_is_64bit())
+		return EFI_SUCCESS;
+
+	/* check for 5 level paging support */
+	if (native_cpuid_eax(0) < 7 ||
+	    !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+		return EFI_SUCCESS;
+
+	/* allocate some 32-bit addressable memory for code and a page table */
+	status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+				    U32_MAX);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
+	memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
+
+	/*
+	 * To avoid the need to allocate a 32-bit addressable stack, the
+	 * trampoline uses a LJMP instruction to switch back to long mode.
+	 * LJMP takes an absolute destination address, which needs to be
+	 * fixed up at runtime.
+	 */
+	*(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
+
+	efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
+
+	return EFI_SUCCESS;
+}
+
+void efi_5level_switch(void)
+{
+	bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+	bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+	bool need_toggle = want_la57 ^ have_la57;
+	u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+	u64 *cr3 = (u64 *)__native_read_cr3();
+	u64 *new_cr3;
+
+	if (!la57_toggle || !need_toggle)
+		return;
+
+	if (!have_la57) {
+		/*
+		 * 5 level paging will be enabled, so a root level page needs
+		 * to be allocated from the 32-bit addressable physical region,
+		 * with its first entry referring to the existing hierarchy.
+		 */
+		new_cr3 = memset(pgt, 0, PAGE_SIZE);
+		new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+	} else {
+		/* take the new root table pointer from the current entry #0 */
+		new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+		/* copy the new root table if it is not 32-bit addressable */
+		if ((u64)new_cr3 > U32_MAX)
+			new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
+	}
+
+	native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
+
+	la57_toggle(new_cr3);
+}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 9247dbc7dbbd12ef..af5f50617a5b4c59 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -17,6 +17,7 @@
 #include <asm/boot.h>
 
 #include "efistub.h"
+#include "x86-stub.h"
 
 /* Maximum physical address for 64-bit kernel with 4-level paging */
 #define MAXMEM_X86_64_4LEVEL (1ull << 46)
@@ -223,8 +224,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
 	}
 }
 
-static void
-adjust_memory_range_protection(unsigned long start, unsigned long size)
+void efi_adjust_memory_range_protection(unsigned long start,
+					unsigned long size)
 {
 	efi_status_t status;
 	efi_gcd_memory_space_desc_t desc;
@@ -278,35 +279,14 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
 	}
 }
 
-/*
- * Trampoline takes 2 pages and can be loaded in first megabyte of memory
- * with its end placed between 128k and 640k where BIOS might start.
- * (see arch/x86/boot/compressed/pgtable_64.c)
- *
- * We cannot find exact trampoline placement since memory map
- * can be modified by UEFI, and it can alter the computed address.
- */
-
-#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
-#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
-
 extern const u8 startup_32[], startup_64[];
 
 static void
 setup_memory_protection(unsigned long image_base, unsigned long image_size)
 {
-	/*
-	 * Allow execution of possible trampoline used
-	 * for switching between 4- and 5-level page tables
-	 * and relocated kernel image.
-	 */
-
-	adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
-				       TRAMPOLINE_PLACEMENT_SIZE);
-
 #ifdef CONFIG_64BIT
 	if (image_base != (unsigned long)startup_32)
-		adjust_memory_range_protection(image_base, image_size);
+		efi_adjust_memory_range_protection(image_base, image_size);
 #else
 	/*
 	 * Clear protection flags on a whole range of possible
@@ -316,8 +296,8 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size)
 	 * need to remove possible protection on relocated image
 	 * itself disregarding further relocations.
 	 */
-	adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
-				       KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
+	efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
+					   KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
 #endif
 }
 
@@ -839,6 +819,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		efi_dxe_table = NULL;
 	}
 
+	status = efi_setup_5level_paging();
+	if (status != EFI_SUCCESS) {
+		efi_err("efi_setup_5level_paging() failed!\n");
+		goto fail;
+	}
+
 	/*
 	 * If the kernel isn't already loaded at a suitable address,
 	 * relocate it.
@@ -959,6 +945,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		goto fail;
 	}
 
+	efi_5level_switch();
+
 	if (IS_ENABLED(CONFIG_X86_64))
 		bzimage_addr += startup_64 - startup_32;
 
diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h
new file mode 100644
index 0000000000000000..37c5a36b9d8cf9b2
--- /dev/null
+++ b/drivers/firmware/efi/libstub/x86-stub.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/efi.h>
+
+extern void trampoline_32bit_src(void *, bool);
+extern const u16 trampoline_ljmp_imm_offset;
+
+void efi_adjust_memory_range_protection(unsigned long start,
+					unsigned long size);
+
+#ifdef CONFIG_X86_64
+efi_status_t efi_setup_5level_paging(void);
+void efi_5level_switch(void);
+#else
+static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
+static inline void efi_5level_switch(void) {}
+#endif
-- 
2.39.2


  parent reply	other threads:[~2023-08-07 16:29 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-07 16:26 [PATCH v9 00/23] efi/x86: Avoid bare metal decompressor during EFI boot Ard Biesheuvel
2023-08-07 16:26 ` [PATCH v9 01/23] x86/decompressor: Don't rely on upper 32 bits of GPRs being preserved Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:26 ` [PATCH v9 02/23] x86/head_64: Store boot_params pointer in callee save register Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 03/23] x86/efistub: Branch straight to kernel entry point from C code Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 04/23] x86/efistub: Simplify and clean up handover entry code Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 05/23] x86/decompressor: Avoid magic offsets for EFI handover entrypoint Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 06/23] x86/efistub: Clear BSS in EFI handover protocol entrypoint Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 07/23] x86/decompressor: Store boot_params pointer in callee save register Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 08/23] x86/decompressor: Assign paging related global variables earlier Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 09/23] x86/decompressor: Call trampoline as a normal function Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 10/23] x86/decompressor: Use standard calling convention for trampoline Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 11/23] x86/decompressor: Avoid the need for a stack in the 32-bit trampoline Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 12/23] x86/decompressor: Call trampoline directly from C code Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 13/23] x86/decompressor: Only call the trampoline when changing paging levels Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 14/23] x86/decompressor: Pass pgtable address to trampoline directly Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 15/23] x86/decompressor: Merge trampoline cleanup with switching code Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` Ard Biesheuvel [this message]
2023-08-08  8:44   ` [tip: x86/boot] x86/efistub: Perform 4/5 level paging switch from the stub tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 17/23] x86/efistub: Prefer EFI memory attributes protocol over DXE services Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 18/23] decompress: Use 8 byte alignment Ard Biesheuvel
2023-08-08  8:44   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 19/23] x86/decompressor: Move global symbol references to C code Ard Biesheuvel
2023-08-08  8:43   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 20/23] x86/decompressor: Factor out kernel decompression and relocation Ard Biesheuvel
2023-08-08  8:43   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 21/23] efi/libstub: Add limit argument to efi_random_alloc() Ard Biesheuvel
2023-08-08  8:43   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 22/23] x86/efistub: Perform SNP feature test while running in the firmware Ard Biesheuvel
2023-08-08  8:43   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2023-08-07 16:27 ` [PATCH v9 23/23] x86/efistub: Avoid legacy decompressor when doing EFI boot Ard Biesheuvel
2023-08-08  8:43   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230807162720.545787-17-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.