* [PATCH 4.4 v2] x86/pti/efi: broken conversion from efi to kernel page table
@ 2018-01-12 20:00 Pavel Tatashin
2018-01-13 13:53 ` Greg KH
0 siblings, 1 reply; 2+ messages in thread
From: Pavel Tatashin @ 2018-01-12 20:00 UTC (permalink / raw)
To: steven.sistare, linux-kernel, tglx, mingo, hpa, x86, gregkh,
jkosina, hughd, dave.hansen, luto, torvalds
In entry_64.S we have code like this:
/* Unconditionally use kernel CR3 for do_nmi() */
/* %rax is saved above, so OK to clobber here */
ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
pushq %rax
/* mask off "user" bit of pgd address and 12 PCID bits: */
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
movq %rax, %cr3
2:
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
call do_nmi
With this instruction:
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
We unconditionally switch from whatever our CR3 was to kernel page table.
But, in arch/x86/platform/efi/efi_64.c We temporarily set a different page
table, that does not have the kernel page table with 0x1000 offset from it.
Look in efi_thunk() and efi_thunk_set_virtual_address_map().
So, while CR3 points to the other page table, we get an NMI interrupt,
and clear 0x1000 from CR3, resulting in a bogus CR3 if the 0x1000 bit was
set.
The efi page table comes from realmode/rm/trampoline_64.S:
arch/x86/realmode/rm/trampoline_64.S
141 .bss
142 .balign PAGE_SIZE
143 GLOBAL(trampoline_pgd) .space PAGE_SIZE
Notice: alignment is PAGE_SIZE, so after applying KAISER_SHADOW_PGD_OFFSET
which equal to PAGE_SIZE, we can get a different page table.
But, even if we fix alignment, here the trampoline binary is later copied
into dynamically allocated memory in reserve_real_mode(), so we need to
fix that place as well.
Fixes: 8a43ddfb93a0 ("KAISER: Kernel Address Isolation")
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
---
arch/x86/include/asm/kaiser.h | 10 ++++++++++
arch/x86/realmode/init.c | 4 +++-
arch/x86/realmode/rm/trampoline_64.S | 3 ++-
3 files changed, 15 insertions(+), 2 deletions(-)
Changelog:
v1 - v2: Fixed compiling issue when PTI config is disabled.
diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
index 802bbbdfe143..48c791a411ab 100644
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -19,6 +19,16 @@
#define KAISER_SHADOW_PGD_OFFSET 0x1000
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * A page table address must have this alignment to stay the same when
+ * KAISER_SHADOW_PGD_OFFSET mask is applied
+ */
+#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1)
+#else
+#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE
+#endif
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_PAGE_TABLE_ISOLATION
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0b7a63d98440..805a3271a137 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -4,6 +4,7 @@
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
+#include <asm/kaiser.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -15,7 +16,8 @@ void __init reserve_real_mode(void)
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
/* Has to be under 1M so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+ mem = memblock_find_in_range(0, 1 << 20, size,
+ KAISER_KERNEL_PGD_ALIGNMENT);
if (!mem)
panic("Cannot allocate trampoline\n");
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20d2f9d..781cca63f795 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#include <asm/kaiser.h>
#include "realmode.h"
.text
@@ -139,7 +140,7 @@ tr_gdt:
tr_gdt_end:
.bss
- .balign PAGE_SIZE
+ .balign KAISER_KERNEL_PGD_ALIGNMENT
GLOBAL(trampoline_pgd) .space PAGE_SIZE
.balign 8
--
1.8.3.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH 4.4 v2] x86/pti/efi: broken conversion from efi to kernel page table
2018-01-12 20:00 [PATCH 4.4 v2] x86/pti/efi: broken conversion from efi to kernel page table Pavel Tatashin
@ 2018-01-13 13:53 ` Greg KH
0 siblings, 0 replies; 2+ messages in thread
From: Greg KH @ 2018-01-13 13:53 UTC (permalink / raw)
To: Pavel Tatashin
Cc: steven.sistare, linux-kernel, tglx, mingo, hpa, x86, jkosina,
hughd, dave.hansen, luto, torvalds
On Fri, Jan 12, 2018 at 03:00:02PM -0500, Pavel Tatashin wrote:
> In entry_64.S we have code like this:
>
> /* Unconditionally use kernel CR3 for do_nmi() */
> /* %rax is saved above, so OK to clobber here */
> ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
> /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
> ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
> pushq %rax
> /* mask off "user" bit of pgd address and 12 PCID bits: */
> andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
> movq %rax, %cr3
> 2:
>
> /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
> call do_nmi
>
> With this instruction:
> andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
>
> We unconditionally switch from whatever our CR3 was to kernel page table.
> But, in arch/x86/platform/efi/efi_64.c We temporarily set a different page
> table, that does not have the kernel page table with 0x1000 offset from it.
>
> Look in efi_thunk() and efi_thunk_set_virtual_address_map().
>
> So, while CR3 points to the other page table, we get an NMI interrupt,
> and clear 0x1000 from CR3, resulting in a bogus CR3 if the 0x1000 bit was
> set.
>
> The efi page table comes from realmode/rm/trampoline_64.S:
>
> arch/x86/realmode/rm/trampoline_64.S
>
> 141 .bss
> 142 .balign PAGE_SIZE
> 143 GLOBAL(trampoline_pgd) .space PAGE_SIZE
>
> Notice: alignment is PAGE_SIZE, so after applying KAISER_SHADOW_PGD_OFFSET
> which equal to PAGE_SIZE, we can get a different page table.
>
> But, even if we fix alignment, here the trampoline binary is later copied
> into dynamically allocated memory in reserve_real_mode(), so we need to
> fix that place as well.
>
> Fixes: 8a43ddfb93a0 ("KAISER: Kernel Address Isolation")
>
> Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
> Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
> ---
> arch/x86/include/asm/kaiser.h | 10 ++++++++++
> arch/x86/realmode/init.c | 4 +++-
> arch/x86/realmode/rm/trampoline_64.S | 3 ++-
> 3 files changed, 15 insertions(+), 2 deletions(-)
>
> Changelog:
> v1 - v2: Fixed compiling issue when PTI config is disabled.
This one is now queued up, thanks.
greg k-h
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2018-01-13 13:53 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-12 20:00 [PATCH 4.4 v2] x86/pti/efi: broken conversion from efi to kernel page table Pavel Tatashin
2018-01-13 13:53 ` Greg KH
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).