From: Andy Lutomirski <luto@kernel.org>
To: x86@kernel.org
Cc: linux-kernel@vger.kernel.org, Borislav Petkov <bp@alien8.de>,
Brian Gerst <brgerst@gmail.com>,
David Laight <David.Laight@aculab.com>,
Kees Cook <keescook@chromium.org>,
Peter Zijlstra <peterz@infradead.org>,
Andy Lutomirski <luto@kernel.org>
Subject: [PATCH PTI v3 07/10] x86/pti: Map the vsyscall page if needed
Date: Tue, 12 Dec 2017 07:56:42 -0800 [thread overview]
Message-ID: <c8d899350d1c897da44fd4da8841fe40109cf544.1513035461.git.luto@kernel.org> (raw)
In-Reply-To: <24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org>
In-Reply-To: <cover.1513035461.git.luto@kernel.org>
Make VSYSCALLs work fully in PTI mode.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
arch/x86/entry/vsyscall/vsyscall_64.c | 6 ++--
arch/x86/include/asm/pgtable.h | 6 +++-
arch/x86/include/asm/pgtable_64.h | 9 +++--
arch/x86/include/asm/vsyscall.h | 1 +
arch/x86/mm/pti.c | 63 +++++++++++++++++++++++++++++++++++
5 files changed, 78 insertions(+), 7 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index a06f2ae09ad6..e4a6fe8354f0 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr)
* vsyscalls but leave the page not present. If so, we skip calling
* this.
*/
-static void __init set_vsyscall_pgtable_user_bits(void)
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
- pgd = pgd_offset_k(VSYSCALL_ADDR);
+ pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
pgd->pgd |= _PAGE_USER;
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
@@ -372,7 +372,7 @@ void __init map_vsyscall(void)
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
- set_vsyscall_pgtable_user_bits();
+ set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 83c0c77e7365..a8a8fc15ca16 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -920,7 +920,11 @@ static inline int pgd_none(pgd_t pgd)
* pgd_offset() returns a (pgd_t *)
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
*/
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
/*
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index be8d086de927..a2fb3f8bc985 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -220,11 +220,14 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
* the wrong CR3.
*
* As exceptions, we don't set NX if:
- * - this is EFI or similar, the kernel may execute from it
+ * - _PAGE_USER is not set. This could be an executable
+ * EFI runtime mapping or something similar, and the kernel
+ * may execute from it
* - we don't have NX support
- * - we're clearing the PGD (i.e. pgd.pgd == 0).
+ * - we're clearing the PGD (i.e. the new pgd is not present).
*/
- if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+ if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
+ (__supported_pte_mask & _PAGE_NX))
pgd.pgd |= _PAGE_NX;
} else {
/*
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
/*
* Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index e01c4aa3ec73..b984e2311969 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -38,6 +38,7 @@
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
#include <asm/cmdline.h>
#include <asm/pti.h>
#include <asm/pgtable.h>
@@ -145,6 +146,48 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
return pmd_offset(pud, address);
}
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down. Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables. It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+ pte_t *pte;
+
+ /* We can't do anything sensible if we hit a large mapping. */
+ if (pmd_large(*pmd)) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ if (pmd_none(*pmd)) {
+ unsigned long new_pte_page = __get_free_page(gfp);
+ if (!new_pte_page)
+ return NULL;
+
+ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+ new_pte_page = 0;
+ }
+ if (new_pte_page)
+ free_page(new_pte_page);
+ }
+
+ pte = pte_offset_kernel(pmd, address);
+ if (pte_flags(*pte) & _PAGE_USER) {
+ WARN_ONCE(1, "attempt to walk to user pte\n");
+ return NULL;
+ }
+ return pte;
+}
+
static void __init
pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
{
@@ -205,6 +248,25 @@ static void __init pti_setup_espfix64(void)
#endif
}
+static void __init pti_setup_vsyscall(void)
+{
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+ pte_t *pte, *target_pte;
+ unsigned int level;
+
+ pte = lookup_address(VSYSCALL_ADDR, &level);
+ if (!pte || WARN_ON(level != PG_LEVEL_4K))
+ return;
+
+ target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+ if (WARN_ON(!target_pte))
+ return;
+
+ *target_pte = *pte;
+ set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+#endif
+}
+
/*
* Clone the populated PMDs of the user shared fixmaps into the user space
* visible page table.
@@ -244,4 +306,5 @@ void __init pti_init(void)
pti_clone_user_shared();
pti_clone_entry_text();
pti_setup_espfix64();
+ pti_setup_vsyscall();
}
--
2.13.6
next prev parent reply other threads:[~2017-12-12 15:58 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-11 23:40 [PATCH PTI v3 00/10] Clean up pgd handling and fix VSYSCALL and LDT Andy Lutomirski
2017-12-12 10:09 ` Ingo Molnar
2017-12-12 15:58 ` Andy Lutomirski
2017-12-12 16:13 ` Borislav Petkov
2017-12-12 16:14 ` Juergen Gross
2017-12-12 16:20 ` Borislav Petkov
2017-12-12 15:56 ` [PATCH PTI v3 01/10] x86/espfix/64: Fix espfix double-fault handling on 5-level systems Andy Lutomirski
2017-12-12 17:18 ` Kirill A. Shutemov
2017-12-15 18:34 ` [tip:x86/urgent] " tip-bot for Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 02/10] x86/pti: Vastly simplify pgd synchronization Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 03/10] x86/pti/64: Fix ESPFIX64 user mapping Andy Lutomirski
2017-12-13 13:12 ` Kirill A. Shutemov
2017-12-13 17:01 ` Andy Lutomirski
2017-12-14 14:10 ` Kirill A. Shutemov
2017-12-14 16:18 ` Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 04/10] Revert "x86/mm/pti: Disable native VSYSCALL" Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 05/10] x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 06/10] x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode Andy Lutomirski
2017-12-12 15:56 ` Andy Lutomirski [this message]
2017-12-12 15:56 ` [PATCH PTI v3 08/10] x86/mm/64: Improve the memory map documentation Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 09/10] x86/mm/64: Make a full PGD-entry size hole in the memory map Andy Lutomirski
2017-12-13 13:17 ` Kirill A. Shutemov
2017-12-13 17:04 ` Andy Lutomirski
2017-12-12 15:56 ` [PATCH PTI v3 10/10] x86/pti: Put the LDT in its own PGD if PTI is on Andy Lutomirski
2017-12-15 22:54 ` Thomas Gleixner
2017-12-16 0:39 ` Thomas Gleixner
2017-12-16 6:41 ` Andy Lutomirski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c8d899350d1c897da44fd4da8841fe40109cf544.1513035461.git.luto@kernel.org \
--to=luto@kernel.org \
--cc=David.Laight@aculab.com \
--cc=bp@alien8.de \
--cc=brgerst@gmail.com \
--cc=keescook@chromium.org \
--cc=linux-kernel@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).