linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv2 for v4.17 0/2] Fix two crashes in the decomression code
@ 2018-05-16  8:01 Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() Kirill A. Shutemov
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2018-05-16  8:01 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Hugh Dickins, linux-kernel, Kirill A. Shutemov

Here's an updated version of two crash fixes in early boot code.

Kirill A. Shutemov (2):
  x86/boot/compressed/64: Set up GOT for paging_prepare() and
    cleanup_trampoline()
  x86/boot/compressed/64: Fix moving page table out of trampoline memory

 arch/x86/boot/compressed/head_64.S    | 79 ++++++++++++++++++++++-----
 arch/x86/boot/compressed/pgtable_64.c | 14 +----
 2 files changed, 69 insertions(+), 24 deletions(-)

-- 
2.17.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()
  2018-05-16  8:01 [PATCHv2 for v4.17 0/2] Fix two crashes in the decomression code Kirill A. Shutemov
@ 2018-05-16  8:01 ` Kirill A. Shutemov
  2018-05-16  9:52   ` Thomas Gleixner
  2018-05-16 10:35   ` [tip:x86/urgent] " tip-bot for Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory Kirill A. Shutemov
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2018-05-16  8:01 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Hugh Dickins, linux-kernel, Kirill A. Shutemov

Eric and Hugh have reported instant reboot due to my recent changes in
decompression code.

The root cause is that I didn't realize that we need to adjust GOT to be
able to run C code that early.

The problem is only visible with an older toolchain. Binutils >= 2.24 is
able to eliminate GOT references by replacing them with RIP-relative
address loads[1].

We need to adjust GOT two times:
 - before calling paging_prepare() using the initial load address
 - before calling C code from the relocated kernel

[1] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commitdiff;h=80d873266dec

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: 194a9749c73d ("x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Hugh Dickins <hughd@google.com>
---
 arch/x86/boot/compressed/head_64.S | 68 ++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fca012baba19..d17af6a4bfc9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,6 +305,25 @@ ENTRY(startup_64)
 	/* Set up the stack */
 	leaq	boot_stack_end(%rbx), %rsp
 
+	/*
+	 * paging_prepare() and cleanup_trampoline() below can have GOT
+	 * references. Adjust the table with address we are running at.
+	 *
+	 * Zero RAX for adjust_got: the GOT was not adjusted before;
+	 * there's no adjustment to undo.
+	 */
+	xorq	%rax, %rax
+
+	/*
+	 * Calculate the address the binary is loaded at and use it as
+	 * a GOT adjustment.
+	 */
+	call	1f
+1:	popq	%rdi
+	subq	$1b, %rdi
+
+	call	adjust_got
+
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
 	 * but we might want to enable 5-level paging or vice versa.
@@ -381,6 +400,21 @@ trampoline_return:
 	pushq	$0
 	popfq
 
+	/*
+	 * Previously we've adjusted the GOT with address the binary was
+	 * loaded at. Now we need to re-adjust for relocation address.
+	 *
+	 * Calculate the address the binary is loaded at, so that we can
+	 * undo the previous GOT adjustment.
+	 */
+	call	1f
+1:	popq	%rax
+	subq	$1b, %rax
+
+	/* The new adjustment is the relocation address */
+	movq	%rbx, %rdi
+	call	adjust_got
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
@@ -481,19 +515,6 @@ relocated:
 	shrq	$3, %rcx
 	rep	stosq
 
-/*
- * Adjust our own GOT
- */
-	leaq	_got(%rip), %rdx
-	leaq	_egot(%rip), %rcx
-1:
-	cmpq	%rcx, %rdx
-	jae	2f
-	addq	%rbx, (%rdx)
-	addq	$8, %rdx
-	jmp	1b
-2:
-	
 /*
  * Do the extraction, and jump to the new kernel..
  */
@@ -512,6 +533,27 @@ relocated:
  */
 	jmp	*%rax
 
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+	/* Walk through the GOT adding the address to the entries */
+	leaq	_got(%rip), %rdx
+	leaq	_egot(%rip), %rcx
+1:
+	cmpq	%rcx, %rdx
+	jae	2f
+	subq	%rax, (%rdx)	/* Undo previous adjustment */
+	addq	%rdi, (%rdx)	/* Apply the new adjustment */
+	addq	$8, %rdx
+	jmp	1b
+2:
+	ret
+
 	.code32
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory
  2018-05-16  8:01 [PATCHv2 for v4.17 0/2] Fix two crashes in the decomression code Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() Kirill A. Shutemov
@ 2018-05-16  8:01 ` Kirill A. Shutemov
  2018-05-16  9:53   ` Thomas Gleixner
  2018-05-16 10:36   ` [tip:x86/urgent] " tip-bot for Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCH 3/4] x86/boot/compressed/64: Fix trampoline page table address calculation Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCH 4/4] x86/mm: Introduce 'no5lvl' kernel parameter Kirill A. Shutemov
  3 siblings, 2 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2018-05-16  8:01 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Hugh Dickins, linux-kernel, Kirill A. Shutemov

cleanup_trampoline() relocates the top-level page table out of
trampoline memory. We use 'top_pgtable' as our new top-level page table.

But if the 'top_pgtable' would be referenced from C in a usual way,
the address of the table will be calculated relative to RIP.
After kernel gets relocated, the address will be in the middle of
decompression buffer and the page table may get overwritten.
This leads to a crash.

We calculate the address of other page tables relative to the relocation
address. It makes them safe. We should do the same for 'top_pgtable'.

Calculate the address of 'top_pgtable' in assembly and pass down to
cleanup_trampoline().

Move the page table to .pgtable section where the rest of page tables
are. The section is @nobits so we save 4k in kernel image.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: e9d0e6330eb8 ("x86/boot/compressed/64: Prepare new top-level page table for trampoline")
---
 arch/x86/boot/compressed/head_64.S    | 11 +++++++++++
 arch/x86/boot/compressed/pgtable_64.c | 14 +++-----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d17af6a4bfc9..8169e8b7a4dc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -389,10 +389,14 @@ trampoline_return:
 	/*
 	 * cleanup_trampoline() would restore trampoline memory.
 	 *
+	 * RDI is address of the page table to use instead of page table
+	 * in trampoline memory (if required).
+	 *
 	 * RSI holds real mode data and needs to be preserved across
 	 * this function call.
 	 */
 	pushq	%rsi
+	leaq	top_pgtable(%rbx), %rdi
 	call	cleanup_trampoline
 	popq	%rsi
 
@@ -691,3 +695,10 @@ boot_stack_end:
 	.balign 4096
 pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+	.fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 32af1cbcd903..a362fa0b849c 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -22,14 +22,6 @@ struct paging_config {
 /* Buffer to preserve trampoline memory */
 static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
 
-/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- *
- * It must not be in BSS as BSS is cleared after cleanup_trampoline().
- */
-static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
-
 /*
  * Trampoline address will be printed by extract_kernel() for debugging
  * purposes.
@@ -134,7 +126,7 @@ struct paging_config paging_prepare(void)
 	return paging_config;
 }
 
-void cleanup_trampoline(void)
+void cleanup_trampoline(void *pgtable)
 {
 	void *trampoline_pgtable;
 
@@ -145,8 +137,8 @@ void cleanup_trampoline(void)
 	 * if it's there.
 	 */
 	if ((void *)__native_read_cr3() == trampoline_pgtable) {
-		memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
-		native_write_cr3((unsigned long)top_pgtable);
+		memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+		native_write_cr3((unsigned long)pgtable);
 	}
 
 	/* Restore trampoline memory */
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] x86/boot/compressed/64: Fix trampoline page table address calculation
  2018-05-16  8:01 [PATCHv2 for v4.17 0/2] Fix two crashes in the decomression code Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory Kirill A. Shutemov
@ 2018-05-16  8:01 ` Kirill A. Shutemov
  2018-05-16  8:06   ` Kirill A. Shutemov
  2018-05-16  8:01 ` [PATCH 4/4] x86/mm: Introduce 'no5lvl' kernel parameter Kirill A. Shutemov
  3 siblings, 1 reply; 10+ messages in thread
From: Kirill A. Shutemov @ 2018-05-16  8:01 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Hugh Dickins, linux-kernel, Kirill A. Shutemov

Hugh noticied that I calculate address of trampoline page table wrong in
cleanup_trampoline(). TRAMPOLINE_32BIT_PGTABLE_OFFSET has to be divided
by sizeof(unsigned long) since trampoline_32bit is unsigned long
pointer.

TRAMPOLINE_32BIT_PGTABLE_OFFSET is zero so the bug doesn't have a
visible effect.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Hugh Dickins <hughd@google.com>
Fixes: e9d0e6330eb8 ("x86/boot/compressed/64: Prepare new top-level page table for trampoline")
---
 arch/x86/boot/compressed/pgtable_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index a362fa0b849c..23707e1da1ff 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -130,7 +130,7 @@ void cleanup_trampoline(void *pgtable)
 {
 	void *trampoline_pgtable;
 
-	trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
+	trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
 
 	/*
 	 * Move the top level page table out of trampoline memory,
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] x86/mm: Introduce 'no5lvl' kernel parameter
  2018-05-16  8:01 [PATCHv2 for v4.17 0/2] Fix two crashes in the decomression code Kirill A. Shutemov
                   ` (2 preceding siblings ...)
  2018-05-16  8:01 ` [PATCH 3/4] x86/boot/compressed/64: Fix trampoline page table address calculation Kirill A. Shutemov
@ 2018-05-16  8:01 ` Kirill A. Shutemov
  3 siblings, 0 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2018-05-16  8:01 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Hugh Dickins, linux-kernel, Kirill A. Shutemov

The kernel parameter allows to force kernel to use 4-level paging even
if hardware and kernel support 5-level paging.

The option may be useful to workaround regressions related to 5-level
paging.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  3 +++
 arch/x86/boot/compressed/cmdline.c              |  2 +-
 arch/x86/boot/compressed/head_64.S              |  1 +
 arch/x86/boot/compressed/pgtable_64.c           | 12 ++++++++++--
 arch/x86/kernel/cpu/common.c                    |  6 ++++++
 arch/x86/kernel/head64.c                        | 10 ++++++----
 6 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 11fc28ecdb6d..364a33c1534d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2600,6 +2600,9 @@
 			emulation library even if a 387 maths coprocessor
 			is present.
 
+	no5lvl		[X86-64] Disable 5-level paging mode. Forces
+			kernel to use 4-level paging instead.
+
 	no_console_suspend
 			[HW] Never suspend the console
 			Disable suspending of consoles during suspend and
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 0cb325734cfb..af6cda0b7900 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "misc.h"
 
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
 
 static unsigned long fs;
 static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1babefd9ddfc..763793349cdd 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -365,6 +365,7 @@ ENTRY(startup_64)
 	 * this function call.
 	 */
 	pushq	%rsi
+	movq	%rsi, %rdi		/* real mode address */
 	call	paging_prepare
 	popq	%rsi
 
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 23707e1da1ff..8c5107545251 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -31,16 +31,23 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
  */
 unsigned long *trampoline_32bit __section(.data);
 
-struct paging_config paging_prepare(void)
+extern struct boot_params *boot_params;
+int cmdline_find_option_bool(const char *option);
+
+struct paging_config paging_prepare(void *rmode)
 {
 	struct paging_config paging_config = {};
 	unsigned long bios_start, ebda_start;
 
+	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
+	boot_params = rmode;
+
 	/*
 	 * Check if LA57 is desired and supported.
 	 *
-	 * There are two parts to the check:
+	 * There are several parts to the check:
 	 *   - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+	 *   - if user asked to disable 5-level paging: no5lvl in cmdline
 	 *   - if the machine supports 5-level paging:
 	 *     + CPUID leaf 7 is supported
 	 *     + the leaf has the feature bit set
@@ -48,6 +55,7 @@ struct paging_config paging_prepare(void)
 	 * That's substitute for boot_cpu_has() in early boot code.
 	 */
 	if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+			!cmdline_find_option_bool("no5lvl") &&
 			native_cpuid_eax(0) >= 7 &&
 			(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
 		paging_config.l5_required = 1;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ce243f7d2d4e..1e91ec6293de 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1008,6 +1008,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	 */
 	setup_clear_cpu_cap(X86_FEATURE_PCID);
 #endif
+
+#ifdef CONFIG_X86_5LEVEL
+	/* Clear the 5-level paging feature if user asked for 'no5lvl' */
+	if (!__pgtable_l5_enabled)
+		setup_clear_cpu_cap(X86_FEATURE_LA57);
+#endif
 }
 
 void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0c408f8c4ed4..8ca65d35b93a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -82,10 +82,12 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
 
 static bool __head check_la57_support(unsigned long physaddr)
 {
-	if (native_cpuid_eax(0) < 7)
-		return false;
-
-	if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+	/*
+	 * 5-level paging is detected and enabled on kernel decomression
+	 * stage. Back off if 5-level paging mode has not yet enabled by
+	 * this point.
+	 */
+	if (!(native_read_cr4() & X86_CR4_LA57))
 		return false;
 
 	*fixup_int(&pgtable_l5_enabled, physaddr) = 1;
-- 
2.17.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/4] x86/boot/compressed/64: Fix trampoline page table address calculation
  2018-05-16  8:01 ` [PATCH 3/4] x86/boot/compressed/64: Fix trampoline page table address calculation Kirill A. Shutemov
@ 2018-05-16  8:06   ` Kirill A. Shutemov
  0 siblings, 0 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2018-05-16  8:06 UTC (permalink / raw)
  To: Ingo Molnar, x86, Thomas Gleixner, H. Peter Anvin
  Cc: Hugh Dickins, linux-kernel

Ouch. Please ignore this and the next patch. It was sent by mistake.
Sorry.

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()
  2018-05-16  8:01 ` [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() Kirill A. Shutemov
@ 2018-05-16  9:52   ` Thomas Gleixner
  2018-05-16 10:35   ` [tip:x86/urgent] " tip-bot for Kirill A. Shutemov
  1 sibling, 0 replies; 10+ messages in thread
From: Thomas Gleixner @ 2018-05-16  9:52 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Ingo Molnar, x86, H. Peter Anvin, Hugh Dickins, linux-kernel

On Wed, 16 May 2018, Kirill A. Shutemov wrote:

> Eric and Hugh have reported instant reboot due to my recent changes in
> decompression code.
> 
> The root cause is that I didn't realize that we need to adjust GOT to be
> able to run C code that early.
> 
> The problem is only visible with an older toolchain. Binutils >= 2.24 is
> able to eliminate GOT references by replacing them with RIP-relative
> address loads[1].
> 
> We need to adjust GOT two times:
>  - before calling paging_prepare() using the initial load address
>  - before calling C code from the relocated kernel
> 
> [1] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commitdiff;h=80d873266dec
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Fixes: 194a9749c73d ("x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G")
> Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
> Reported-by: Hugh Dickins <hughd@google.com>

Very nice and well done!

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory
  2018-05-16  8:01 ` [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory Kirill A. Shutemov
@ 2018-05-16  9:53   ` Thomas Gleixner
  2018-05-16 10:36   ` [tip:x86/urgent] " tip-bot for Kirill A. Shutemov
  1 sibling, 0 replies; 10+ messages in thread
From: Thomas Gleixner @ 2018-05-16  9:53 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Ingo Molnar, x86, H. Peter Anvin, Hugh Dickins, linux-kernel

On Wed, 16 May 2018, Kirill A. Shutemov wrote:

> cleanup_trampoline() relocates the top-level page table out of
> trampoline memory. We use 'top_pgtable' as our new top-level page table.
> 
> But if the 'top_pgtable' would be referenced from C in a usual way,
> the address of the table will be calculated relative to RIP.
> After kernel gets relocated, the address will be in the middle of
> decompression buffer and the page table may get overwritten.
> This leads to a crash.
> 
> We calculate the address of other page tables relative to the relocation
> address. It makes them safe. We should do the same for 'top_pgtable'.
> 
> Calculate the address of 'top_pgtable' in assembly and pass down to
> cleanup_trampoline().
> 
> Move the page table to .pgtable section where the rest of page tables
> are. The section is @nobits so we save 4k in kernel image.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Fixes: e9d0e6330eb8 ("x86/boot/compressed/64: Prepare new top-level page table for trampoline")

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:x86/urgent] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()
  2018-05-16  8:01 ` [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() Kirill A. Shutemov
  2018-05-16  9:52   ` Thomas Gleixner
@ 2018-05-16 10:35   ` tip-bot for Kirill A. Shutemov
  1 sibling, 0 replies; 10+ messages in thread
From: tip-bot for Kirill A. Shutemov @ 2018-05-16 10:35 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hughd, eric.dumazet, hpa, mingo, tglx, torvalds, peterz,
	kirill.shutemov, linux-kernel

Commit-ID:  5c9b0b1c49881c680d4a56b9d9e03dfb3160fd4d
Gitweb:     https://git.kernel.org/tip/5c9b0b1c49881c680d4a56b9d9e03dfb3160fd4d
Author:     Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
AuthorDate: Wed, 16 May 2018 11:01:28 +0300
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 16 May 2018 12:15:13 +0200

x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()

Eric and Hugh have reported instant reboot due to my recent changes in
decompression code.

The root cause is that I didn't realize that we need to adjust GOT to be
able to run C code that early.

The problem is only visible with an older toolchain. Binutils >= 2.24 is
able to eliminate GOT references by replacing them with RIP-relative
address loads:

  https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commitdiff;h=80d873266dec

We need to adjust GOT two times:

 - before calling paging_prepare() using the initial load address
 - before calling C code from the relocated kernel

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 194a9749c73d ("x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G")
Link: http://lkml.kernel.org/r/20180516080131.27913-2-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/head_64.S | 68 ++++++++++++++++++++++++++++++--------
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fca012baba19..d17af6a4bfc9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,6 +305,25 @@ ENTRY(startup_64)
 	/* Set up the stack */
 	leaq	boot_stack_end(%rbx), %rsp
 
+	/*
+	 * paging_prepare() and cleanup_trampoline() below can have GOT
+	 * references. Adjust the table with address we are running at.
+	 *
+	 * Zero RAX for adjust_got: the GOT was not adjusted before;
+	 * there's no adjustment to undo.
+	 */
+	xorq	%rax, %rax
+
+	/*
+	 * Calculate the address the binary is loaded at and use it as
+	 * a GOT adjustment.
+	 */
+	call	1f
+1:	popq	%rdi
+	subq	$1b, %rdi
+
+	call	adjust_got
+
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
 	 * but we might want to enable 5-level paging or vice versa.
@@ -381,6 +400,21 @@ trampoline_return:
 	pushq	$0
 	popfq
 
+	/*
+	 * Previously we've adjusted the GOT with address the binary was
+	 * loaded at. Now we need to re-adjust for relocation address.
+	 *
+	 * Calculate the address the binary is loaded at, so that we can
+	 * undo the previous GOT adjustment.
+	 */
+	call	1f
+1:	popq	%rax
+	subq	$1b, %rax
+
+	/* The new adjustment is the relocation address */
+	movq	%rbx, %rdi
+	call	adjust_got
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
@@ -481,19 +515,6 @@ relocated:
 	shrq	$3, %rcx
 	rep	stosq
 
-/*
- * Adjust our own GOT
- */
-	leaq	_got(%rip), %rdx
-	leaq	_egot(%rip), %rcx
-1:
-	cmpq	%rcx, %rdx
-	jae	2f
-	addq	%rbx, (%rdx)
-	addq	$8, %rdx
-	jmp	1b
-2:
-	
 /*
  * Do the extraction, and jump to the new kernel..
  */
@@ -512,6 +533,27 @@ relocated:
  */
 	jmp	*%rax
 
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+	/* Walk through the GOT adding the address to the entries */
+	leaq	_got(%rip), %rdx
+	leaq	_egot(%rip), %rcx
+1:
+	cmpq	%rcx, %rdx
+	jae	2f
+	subq	%rax, (%rdx)	/* Undo previous adjustment */
+	addq	%rdi, (%rdx)	/* Apply the new adjustment */
+	addq	$8, %rdx
+	jmp	1b
+2:
+	ret
+
 	.code32
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [tip:x86/urgent] x86/boot/compressed/64: Fix moving page table out of trampoline memory
  2018-05-16  8:01 ` [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory Kirill A. Shutemov
  2018-05-16  9:53   ` Thomas Gleixner
@ 2018-05-16 10:36   ` tip-bot for Kirill A. Shutemov
  1 sibling, 0 replies; 10+ messages in thread
From: tip-bot for Kirill A. Shutemov @ 2018-05-16 10:36 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, hpa, kirill.shutemov, linux-kernel, hughd, mingo, torvalds, tglx

Commit-ID:  589bb62be316401603453c7d2d3c60ad8b9c3cf3
Gitweb:     https://git.kernel.org/tip/589bb62be316401603453c7d2d3c60ad8b9c3cf3
Author:     Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
AuthorDate: Wed, 16 May 2018 11:01:29 +0300
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 16 May 2018 12:15:13 +0200

x86/boot/compressed/64: Fix moving page table out of trampoline memory

cleanup_trampoline() relocates the top-level page table out of
trampoline memory. We use 'top_pgtable' as our new top-level page table.

But if the 'top_pgtable' would be referenced from C in a usual way,
the address of the table will be calculated relative to RIP.
After kernel gets relocated, the address will be in the middle of
decompression buffer and the page table may get overwritten.
This leads to a crash.

We calculate the address of other page tables relative to the relocation
address. It makes them safe. We should do the same for 'top_pgtable'.

Calculate the address of 'top_pgtable' in assembly and pass down to
cleanup_trampoline().

Move the page table to .pgtable section where the rest of page tables
are. The section is @nobits so we save 4k in kernel image.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: e9d0e6330eb8 ("x86/boot/compressed/64: Prepare new top-level page table for trampoline")
Link: http://lkml.kernel.org/r/20180516080131.27913-3-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/head_64.S    | 11 +++++++++++
 arch/x86/boot/compressed/pgtable_64.c | 14 +++-----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d17af6a4bfc9..8169e8b7a4dc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -389,10 +389,14 @@ trampoline_return:
 	/*
 	 * cleanup_trampoline() would restore trampoline memory.
 	 *
+	 * RDI is address of the page table to use instead of page table
+	 * in trampoline memory (if required).
+	 *
 	 * RSI holds real mode data and needs to be preserved across
 	 * this function call.
 	 */
 	pushq	%rsi
+	leaq	top_pgtable(%rbx), %rdi
 	call	cleanup_trampoline
 	popq	%rsi
 
@@ -691,3 +695,10 @@ boot_stack_end:
 	.balign 4096
 pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+	.fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 32af1cbcd903..a362fa0b849c 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -22,14 +22,6 @@ struct paging_config {
 /* Buffer to preserve trampoline memory */
 static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
 
-/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- *
- * It must not be in BSS as BSS is cleared after cleanup_trampoline().
- */
-static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
-
 /*
  * Trampoline address will be printed by extract_kernel() for debugging
  * purposes.
@@ -134,7 +126,7 @@ out:
 	return paging_config;
 }
 
-void cleanup_trampoline(void)
+void cleanup_trampoline(void *pgtable)
 {
 	void *trampoline_pgtable;
 
@@ -145,8 +137,8 @@ void cleanup_trampoline(void)
 	 * if it's there.
 	 */
 	if ((void *)__native_read_cr3() == trampoline_pgtable) {
-		memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
-		native_write_cr3((unsigned long)top_pgtable);
+		memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+		native_write_cr3((unsigned long)pgtable);
 	}
 
 	/* Restore trampoline memory */

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-05-16 10:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-16  8:01 [PATCHv2 for v4.17 0/2] Fix two crashes in the decomression code Kirill A. Shutemov
2018-05-16  8:01 ` [PATCHv2 1/2] x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() Kirill A. Shutemov
2018-05-16  9:52   ` Thomas Gleixner
2018-05-16 10:35   ` [tip:x86/urgent] " tip-bot for Kirill A. Shutemov
2018-05-16  8:01 ` [PATCHv2 2/2] x86/boot/compressed/64: Fix moving page table out of trampoline memory Kirill A. Shutemov
2018-05-16  9:53   ` Thomas Gleixner
2018-05-16 10:36   ` [tip:x86/urgent] " tip-bot for Kirill A. Shutemov
2018-05-16  8:01 ` [PATCH 3/4] x86/boot/compressed/64: Fix trampoline page table address calculation Kirill A. Shutemov
2018-05-16  8:06   ` Kirill A. Shutemov
2018-05-16  8:01 ` [PATCH 4/4] x86/mm: Introduce 'no5lvl' kernel parameter Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).