linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb+git@google.com>
To: linux-kernel@vger.kernel.org
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Kevin Loughlin <kevinloughlin@google.com>,
	 Tom Lendacky <thomas.lendacky@amd.com>,
	Dionna Glaze <dionnaglaze@google.com>,
	 Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	 Dave Hansen <dave.hansen@linux.intel.com>,
	Andy Lutomirski <luto@kernel.org>,  Arnd Bergmann <arnd@arndb.de>,
	Nathan Chancellor <nathan@kernel.org>,
	 Nick Desaulniers <ndesaulniers@google.com>,
	Justin Stitt <justinstitt@google.com>,
	 Kees Cook <keescook@chromium.org>,
	Brian Gerst <brgerst@gmail.com>,
	linux-arch@vger.kernel.org,  llvm@lists.linux.dev
Subject: [PATCH v3 08/19] x86/head64: Replace pointer fixups with PIE codegen
Date: Mon, 29 Jan 2024 19:05:11 +0100	[thread overview]
Message-ID: <20240129180502.4069817-29-ardb+git@google.com> (raw)
In-Reply-To: <20240129180502.4069817-21-ardb+git@google.com>

From: Ard Biesheuvel <ardb@kernel.org>

Some of the C code in head64.c may be called from a different virtual
address than it was linked at. Currently, we deal with this by using
ordinary, position dependent codegen, and fixing up all symbol
references on the fly. This is fragile and tricky to maintain. It is
also unnecessary: we can use position independent codegen (with hidden
visibility) to ensure that all compiler generated symbol references are
RIP-relative, removing the need for fixups entirely.

It does mean we need explicit references to kernel virtual addresses to
be generated by hand, so generate those using a movabs instruction in
inline asm in the handful places where we actually need this.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/Makefile                 |  8 ++
 arch/x86/boot/compressed/Makefile |  2 +-
 arch/x86/include/asm/desc.h       |  3 +-
 arch/x86/include/asm/setup.h      |  4 +-
 arch/x86/kernel/Makefile          |  5 ++
 arch/x86/kernel/head64.c          | 88 +++++++-------------
 arch/x86/kernel/head_64.S         |  5 +-
 7 files changed, 51 insertions(+), 64 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1a068de12a56..2b5954e75318 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -168,6 +168,14 @@ else
         KBUILD_CFLAGS += -mcmodel=kernel
         KBUILD_RUSTFLAGS += -Cno-redzone=y
         KBUILD_RUSTFLAGS += -Ccode-model=kernel
+
+	PIE_CFLAGS-$(CONFIG_STACKPROTECTOR)	+= -fno-stack-protector
+	PIE_CFLAGS-$(CONFIG_LTO)		+= -fno-lto
+
+	PIE_CFLAGS := -fpie -mcmodel=small $(PIE_CFLAGS-y) \
+		      -include $(srctree)/include/linux/hidden.h
+
+	export PIE_CFLAGS
 endif
 
 #
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f19c038409aa..bccee07eae60 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T
 hostprogs	:= mkpiggy
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
 
 quiet_cmd_voffset = VOFFSET $@
       cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ab97b22ac04a..2e9809feeacd 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -134,7 +134,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
 
 #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
 
-static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
+static __always_inline void
+native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
 {
 	memcpy(&idt[entry], gate, sizeof(*gate));
 }
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 5c83729c8e71..b004f1b9a052 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -47,8 +47,8 @@ extern unsigned long saved_video_mode;
 
 extern void reserve_standard_io_resources(void);
 extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern void startup_64_setup_env(unsigned long physbase);
+extern unsigned long __startup_64(struct boot_params *bp);
+extern void startup_64_setup_env(void);
 extern void early_setup_idt(void);
 extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0000325ab98f..42db41b04d8e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -21,6 +21,11 @@ CFLAGS_REMOVE_sev.o = -pg
 CFLAGS_REMOVE_rethook.o = -pg
 endif
 
+# head64.c contains C code that may execute from a different virtual address
+# than it was linked at, so we always build it using PIE codegen
+CFLAGS_head64.o += $(PIE_CFLAGS)
+UBSAN_SANITIZE_head64.o					:= n
+
 KASAN_SANITIZE_head$(BITS).o				:= n
 KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d636bb02213f..a4a380494703 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -74,15 +74,10 @@ static struct desc_ptr startup_gdt_descr __initdata = {
 	.address = 0,
 };
 
-static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
-{
-	return ptr - (void *)_text + (void *)physaddr;
-}
-
-static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
-{
-	return fixup_pointer(ptr, physaddr);
-}
+#define __va_symbol(sym) ({						\
+	unsigned long __v;						\
+	asm("movq $" __stringify(sym) ", %0":"=r"(__v));		\
+	__v; })
 
 static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
 {
@@ -99,8 +94,8 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
 	 * attribute.
 	 */
 	if (sme_get_me_mask()) {
-		vaddr = (unsigned long)__start_bss_decrypted;
-		vaddr_end = (unsigned long)__end_bss_decrypted;
+		vaddr = __va_symbol(__start_bss_decrypted);
+		vaddr_end = __va_symbol(__end_bss_decrypted);
 
 		for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
 			/*
@@ -127,25 +122,17 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
 	return sme_get_me_mask();
 }
 
-/* Code in __startup_64() can be relocated during execution, but the compiler
- * doesn't have to generate PC-relative relocations when accessing globals from
- * that function. Clang actually does not generate them, which leads to
- * boot-time crashes. To work around this problem, every global pointer must
- * be adjusted using fixup_pointer().
- */
-unsigned long __head __startup_64(unsigned long physaddr,
-				  struct boot_params *bp)
+unsigned long __head __startup_64(struct boot_params *bp)
 {
+	unsigned long physaddr = (unsigned long)_text;
 	unsigned long load_delta, *p;
 	unsigned long pgtable_flags;
 	pgdval_t *pgd;
 	p4dval_t *p4d;
 	pudval_t *pud;
 	pmdval_t *pmd, pmd_entry;
-	pteval_t *mask_ptr;
 	bool la57;
 	int i;
-	unsigned int *next_pgt_ptr;
 
 	la57 = pgtable_l5_enabled();
 
@@ -157,7 +144,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	 * Compute the delta between the address I am compiled to run at
 	 * and the address I am actually running at.
 	 */
-	load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
+	load_delta = physaddr - (__va_symbol(_text) - __START_KERNEL_map);
 
 	/* Is the address not 2M aligned? */
 	if (load_delta & ~PMD_MASK)
@@ -168,26 +155,24 @@ unsigned long __head __startup_64(unsigned long physaddr,
 
 	/* Fixup the physical addresses in the page table */
 
-	pgd = fixup_pointer(early_top_pgt, physaddr);
+	pgd = (pgdval_t *)early_top_pgt;
 	p = pgd + pgd_index(__START_KERNEL_map);
 	if (la57)
 		*p = (unsigned long)level4_kernel_pgt;
 	else
 		*p = (unsigned long)level3_kernel_pgt;
-	*p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
+	*p += _PAGE_TABLE_NOENC + sme_get_me_mask();
 
 	if (la57) {
-		p4d = fixup_pointer(level4_kernel_pgt, physaddr);
+		p4d = (p4dval_t *)level4_kernel_pgt;
 		p4d[511] += load_delta;
 	}
 
-	pud = fixup_pointer(level3_kernel_pgt, physaddr);
-	pud[510] += load_delta;
-	pud[511] += load_delta;
+	level3_kernel_pgt[510].pud += load_delta;
+	level3_kernel_pgt[511].pud += load_delta;
 
-	pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
 	for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
-		pmd[i] += load_delta;
+		level2_fixmap_pgt[i].pmd += load_delta;
 
 	/*
 	 * Set up the identity mapping for the switchover.  These
@@ -196,15 +181,13 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	 * it avoids problems around wraparound.
 	 */
 
-	next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr);
-	pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
-	pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
+	pud = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
+	pmd = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
 
 	pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
 
 	if (la57) {
-		p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
-				    physaddr);
+		p4d = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
 
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
 		pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
@@ -225,8 +208,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
 
 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
 	/* Filter out unsupported __PAGE_KERNEL_* bits: */
-	mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
-	pmd_entry &= *mask_ptr;
+	pmd_entry &= __supported_pte_mask;
 	pmd_entry += sme_get_me_mask();
 	pmd_entry +=  physaddr;
 
@@ -252,14 +234,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	 * error, causing the BIOS to halt the system.
 	 */
 
-	pmd = fixup_pointer(level2_kernel_pgt, physaddr);
+	pmd = (pmdval_t *)level2_kernel_pgt;
 
 	/* invalidate pages before the kernel image */
-	for (i = 0; i < pmd_index((unsigned long)_text); i++)
+	for (i = 0; i < pmd_index(__va_symbol(_text)); i++)
 		pmd[i] &= ~_PAGE_PRESENT;
 
 	/* fixup pages that are part of the kernel image */
-	for (; i <= pmd_index((unsigned long)_end); i++)
+	for (; i <= pmd_index(__va_symbol(_end)); i++)
 		if (pmd[i] & _PAGE_PRESENT)
 			pmd[i] += load_delta;
 
@@ -271,7 +253,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	 * Fixup phys_base - remove the memory encryption mask to obtain
 	 * the true physical address.
 	 */
-	*fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
+	phys_base += load_delta - sme_get_me_mask();
 
 	return sme_postprocess_startup(bp, pmd);
 }
@@ -553,22 +535,16 @@ static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler)
 }
 
 /* This runs while still in the direct mapping */
-static void __head startup_64_load_idt(unsigned long physbase)
+static void __head startup_64_load_idt(void)
 {
-	struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase);
-	gate_desc *idt = fixup_pointer(bringup_idt_table, physbase);
-
-
-	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
-		void *handler;
+	gate_desc *idt = bringup_idt_table;
 
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
 		/* VMM Communication Exception */
-		handler = fixup_pointer(vc_no_ghcb, physbase);
-		set_bringup_idt_handler(idt, X86_TRAP_VC, handler);
-	}
+		set_bringup_idt_handler(idt, X86_TRAP_VC, vc_no_ghcb);
 
-	desc->address = (unsigned long)idt;
-	native_load_idt(desc);
+	bringup_idt_descr.address = (unsigned long)idt;
+	native_load_idt(&bringup_idt_descr);
 }
 
 /* This is used when running on kernel addresses */
@@ -587,10 +563,10 @@ void early_setup_idt(void)
 /*
  * Setup boot CPU state needed before kernel switches to virtual addresses.
  */
-void __head startup_64_setup_env(unsigned long physbase)
+void __head startup_64_setup_env(void)
 {
 	/* Load GDT */
-	startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase);
+	startup_gdt_descr.address = (unsigned long)startup_gdt;
 	native_load_gdt(&startup_gdt_descr);
 
 	/* New GDT is live - reload data segment registers */
@@ -598,5 +574,5 @@ void __head startup_64_setup_env(unsigned long physbase)
 		     "movl %%eax, %%ss\n"
 		     "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
 
-	startup_64_load_idt(physbase);
+	startup_64_load_idt();
 }
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 953b82be4cd4..b0508e84f756 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -67,8 +67,6 @@ SYM_CODE_START_NOALIGN(startup_64)
 	/* Set up the stack for verify_cpu() */
 	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
 
-	leaq	_text(%rip), %rdi
-
 	/* Setup GSBASE to allow stack canary access for C code */
 	movl	$MSR_GS_BASE, %ecx
 	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
@@ -107,8 +105,7 @@ SYM_CODE_START_NOALIGN(startup_64)
 	 * is active) to be added to the initial pgdir entry that will be
 	 * programmed into CR3.
 	 */
-	leaq	_text(%rip), %rdi
-	movq	%r15, %rsi
+	movq	%r15, %rdi
 	call	__startup_64
 
 	/* Form the CR3 value being sure to include the CR3 modifier */
-- 
2.43.0.429.g432eaa2c6b-goog


  parent reply	other threads:[~2024-01-29 18:05 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-29 18:05 [PATCH v3 00/19] x86: Confine early 1:1 mapped startup code Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 01/19] efi/libstub: Add generic support for parsing mem_encrypt= Ard Biesheuvel
2024-01-31  7:31   ` Borislav Petkov
2024-02-01 16:23     ` Kevin Loughlin
2024-02-01 16:28       ` Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 02/19] x86/boot: Move mem_encrypt= parsing to the decompressor Ard Biesheuvel
2024-01-31  8:35   ` Borislav Petkov
2024-01-31  9:12     ` Ard Biesheuvel
2024-01-31  9:29       ` Borislav Petkov
2024-01-31  9:59         ` Ard Biesheuvel
2024-02-01 14:17         ` Tom Lendacky
2024-02-01 16:15           ` Ard Biesheuvel
2024-02-02 16:35             ` [PATCH] x86/Kconfig: Remove CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT Borislav Petkov
2024-02-02 16:47               ` Ard Biesheuvel
2024-02-03 10:50               ` [tip: x86/sev] " tip-bot2 for Borislav Petkov (AMD)
2024-01-29 18:05 ` [PATCH v3 03/19] x86/startup_64: Drop long return to initial_code pointer Ard Biesheuvel
2024-01-31 13:44   ` Borislav Petkov
2024-01-31 13:57     ` Ard Biesheuvel
2024-01-31 14:07       ` Ard Biesheuvel
2024-01-31 16:29         ` Borislav Petkov
2024-01-31 18:14   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 04/19] x86/startup_64: Simplify calculation of initial page table address Ard Biesheuvel
2024-02-05 10:40   ` Borislav Petkov
2024-01-29 18:05 ` [PATCH v3 05/19] x86/startup_64: Simplify CR4 handling in startup code Ard Biesheuvel
2024-02-06 18:21   ` Borislav Petkov
2024-02-07 10:38     ` Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 06/19] x86/startup_64: Drop global variables keeping track of LA57 state Ard Biesheuvel
2024-02-07 13:29   ` Borislav Petkov
2024-02-09 13:55     ` Ard Biesheuvel
2024-02-10 10:40       ` Borislav Petkov
2024-02-11 22:36         ` Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 07/19] x86/startup_64: Simplify virtual switch on primary boot Ard Biesheuvel
2024-02-07 14:50   ` Borislav Petkov
2024-01-29 18:05 ` Ard Biesheuvel [this message]
2024-02-12 10:29   ` [PATCH v3 08/19] x86/head64: Replace pointer fixups with PIE codegen Borislav Petkov
2024-02-12 11:52     ` Ard Biesheuvel
2024-02-12 14:18       ` Borislav Petkov
2024-01-29 18:05 ` [PATCH v3 09/19] x86/head64: Simplify GDT/IDT initialization code Ard Biesheuvel
2024-02-12 14:37   ` Borislav Petkov
2024-02-12 15:23     ` Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 10/19] asm-generic: Add special .pi.text section for position independent code Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 11/19] x86: Move return_thunk to __pitext section Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 12/19] x86/head64: Move early startup code into __pitext Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 13/19] modpost: Warn about calls from __pitext into other text sections Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 14/19] x86/coco: Make cc_set_mask() static inline Ard Biesheuvel
2024-01-30 23:16   ` Kevin Loughlin
2024-01-30 23:36     ` Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 15/19] x86/sev: Make all code reachable from 1:1 mapping __pitext Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 16/19] x86/sev: Avoid WARN() in early code Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 17/19] x86/sev: Use PIC codegen for early SEV startup code Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 18/19] x86/sev: Drop inline asm LEA instructions for RIP-relative references Ard Biesheuvel
2024-01-29 18:05 ` [PATCH v3 19/19] x86/startup_64: Don't bother setting up GS before the kernel is mapped Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240129180502.4069817-29-ardb+git@google.com \
    --to=ardb+git@google.com \
    --cc=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dionnaglaze@google.com \
    --cc=justinstitt@google.com \
    --cc=keescook@chromium.org \
    --cc=kevinloughlin@google.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=llvm@lists.linux.dev \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=nathan@kernel.org \
    --cc=ndesaulniers@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).