From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Ingo Molnar <mingo@redhat.com>,
x86@kernel.org, Thomas Gleixner <tglx@linutronix.de>,
"H. Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Andy Lutomirski <luto@amacapital.net>,
Cyrill Gorcunov <gorcunov@openvz.org>,
Borislav Petkov <bp@suse.de>, Andi Kleen <ak@linux.intel.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv2 4/4] x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
Date: Sat, 11 Nov 2017 01:06:45 +0300 [thread overview]
Message-ID: <20171110220645.59944-5-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20171110220645.59944-1-kirill.shutemov@linux.intel.com>
This patch addresses shortcoming in current boot process on machines
that supports 5-level paging.
If bootloader enables 64-bit mode with 4-level paging, we need to
switch over to 5-level paging. The switching requires disabling paging.
It works fine if kernel itself is loaded below 4G.
If bootloader put the kernel above 4G (not sure if anybody does this),
we would loose control as soon as paging is disabled as code becomes
unreachable.
This patch implements trampoline in lower memory to handle this
situation.
We only need the memory for very short time, until main kernel image
setup its own page tables.
We go though trampoline even if we don't have to: if we're already in
5-level paging mode or if we don't need to switch to it. This way the
trampoline code gets tested not only in special rare case, but on every
boot.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
arch/x86/boot/compressed/head_64.S | 72 +++++++++++++++++++++++---------------
1 file changed, 43 insertions(+), 29 deletions(-)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 33a47d5c6445..525972ca27b7 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include "pgtable.h"
/*
* Locally defined symbols should be marked hidden:
@@ -339,31 +340,22 @@ ENTRY(startup_64)
call paging_prepare
popq %rsi
movq %rax, %rcx
- andq $(~1UL), %rcx
-
- testq $1, %rax
- jz lvl5
-
- /* Clear additional page table */
- leaq lvl5_pgtable(%rbx), %rdi
- xorq %rax, %rax
- movq $(PAGE_SIZE/8), %rcx
- rep stosq
/*
- * Setup current CR3 as the first and only entry in a new top level
- * page table.
+ * Load address of trampoline_return into RDI.
+ * It will be used by trampoline to return to main code.
*/
- movq %cr3, %rdi
- leaq 0x7 (%rdi), %rax
- movq %rax, lvl5_pgtable(%rbx)
+ leaq trampoline_return(%rip), %rdi
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS
- leaq compatible_mode(%rip), %rax
+ andq $(~1UL), %rax /* Clear bit 0: encode if 5-level paging neeeded */
+ leaq TRAMPOLINE_32BIT_CODE_OFF(%rax), %rax
pushq %rax
lretq
-lvl5:
+trampoline_return:
+ /* Restore stack, 32-bit trampoline uses own stack */
+ leaq boot_stack_end(%rbx), %rsp
/* Zero EFLAGS */
pushq $0
@@ -501,36 +493,51 @@ relocated:
jmp *%rax
.code32
+/*
+ * This is 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains return address (might be above 4G).
+ * ECX contains the base address of trampoline memory.
+ * Bit 0 of ECX encodes if 5-level paging is required.
+ */
ENTRY(trampoline_32bit_src)
-compatible_mode:
/* Setup data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %ss
+ movl %ecx, %edx
+ andl $(~1UL), %edx
+
+ /* Setup new stack at the end of trampoline memory */
+ leal TRAMPOLINE_32BIT_STACK_END (%edx), %esp
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
- /* Point CR3 to 5-level paging */
- leal lvl5_pgtable(%ebx), %eax
+ /* Point CR3 to trampoline top level page table */
+ leal TRAMPOLINE_32BIT_PGTABLE_OFF (%edx), %eax
movl %eax, %cr3
/* Enable PAE and LA57 mode */
movl %cr4, %eax
- orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
+ orl $X86_CR4_PAE, %eax
+
+ /* Bit 0 of ECX encodes if 5-level paging is required */
+ testl $1, %ecx
+ jz 1f
+ orl $X86_CR4_LA57, %eax
+1:
movl %eax, %cr4
- /* Calculate address we are running at */
- call 1f
-1: popl %edi
- subl $1b, %edi
+ /* Calculate address of paging_enabled once we are in trampoline */
+ leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFF (%edx), %eax
/* Prepare stack for far return to Long Mode */
pushl $__KERNEL_CS
- leal lvl5(%edi), %eax
- push %eax
+ pushl %eax
/* Enable paging back */
movl $(X86_CR0_PG | X86_CR0_PE), %eax
@@ -538,6 +545,15 @@ compatible_mode:
lret
+ .code64
+paging_enabled:
+ /* Return from trampoline */
+ jmp *%rdi
+
+ /* Bound size of trampoline code */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+ .code32
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
@@ -595,5 +611,3 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
-lvl5_pgtable:
- .fill PAGE_SIZE, 1, 0
--
2.14.2
next prev parent reply other threads:[~2017-11-10 22:07 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-10 22:06 [PATCHv2 0/4] x86: 5-level related changes into decompression code Kirill A. Shutemov
2017-11-10 22:06 ` [PATCHv2 1/4] x86/boot/compressed/64: Rename pagetable.c to kaslr_64.c Kirill A. Shutemov
2017-11-10 22:06 ` [PATCHv2 2/4] x86/boot/compressed/64: Detect and handle 5-level paging at boot-time Kirill A. Shutemov
2017-11-10 22:06 ` [PATCHv2 3/4] x86/boot/compressed/64: Introduce place_trampoline() Kirill A. Shutemov
2017-11-10 22:06 ` Kirill A. Shutemov [this message]
2017-11-22 8:09 ` [PATCHv2 0/4] x86: 5-level related changes into decompression code Kirill A. Shutemov
2017-11-29 15:49 ` Borislav Petkov
2017-11-29 16:13 ` Kirill A. Shutemov
2017-11-29 16:40 ` Thomas Gleixner
2017-11-29 17:08 ` Kirill A. Shutemov
2017-11-29 17:48 ` Borislav Petkov
2017-11-29 19:01 ` H. Peter Anvin
2017-11-29 19:19 ` Borislav Petkov
2017-11-29 21:33 ` H. Peter Anvin
2017-11-29 22:31 ` Borislav Petkov
2017-11-29 23:24 ` H. Peter Anvin
2017-11-30 1:27 ` Konrad Rzeszutek Wilk
2017-11-30 10:12 ` Borislav Petkov
2017-11-30 7:31 ` Kirill A. Shutemov
2017-11-30 10:14 ` Borislav Petkov
2017-11-30 15:45 ` Joe Perches
2017-11-29 20:58 ` Andi Kleen
2017-11-29 21:03 ` hpa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171110220645.59944-5-kirill.shutemov@linux.intel.com \
--to=kirill.shutemov@linux.intel.com \
--cc=ak@linux.intel.com \
--cc=bp@suse.de \
--cc=gorcunov@openvz.org \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@amacapital.net \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).