From: Andy Lutomirski <luto@kernel.org>
To: x86@kernel.org, Nadav Amit <nadav.amit@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>, Rik van Riel <riel@surriel.com>,
Jann Horn <jannh@google.com>, LKML <linux-kernel@vger.kernel.org>,
Andy Lutomirski <luto@kernel.org>,
stable@vger.kernel.org, Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH v2] x86/nmi: Fix some races in NMI uaccess
Date: Wed, 29 Aug 2018 08:47:18 -0700
Message-ID: <dd956eba16646fd0b15c3c0741269dfd84452dac.1535557289.git.luto@kernel.org> (raw)
In NMI context, we might be in the middle of context switching or in
the middle of switch_mm_irqs_off(). In either case, CR3 might not
match current->mm, which could cause copy_from_user_nmi() and
friends to read the wrong memory.
Fix it by adding a new nmi_uaccess_okay() helper and checking it in
copy_from_user_nmi() and in __copy_from_user_nmi()'s callers.
Cc: stable@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
Nadav, this is intended for your series. Want to add it right
before the use_temporary_mm() stuff?
Changes from v1:
- Improved comments
- Add debugging
- Fix bugs
arch/x86/events/core.c | 2 +-
arch/x86/include/asm/tlbflush.h | 44 +++++++++++++++++++++++++++++++++
arch/x86/lib/usercopy.c | 5 ++++
arch/x86/mm/tlb.c | 7 ++++++
4 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
perf_callchain_store(entry, regs->ip);
- if (!current->mm)
+ if (!nmi_uaccess_okay())
return;
if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..183d36a98b04 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,8 +175,16 @@ struct tlb_state {
* are on. This means that it may not match current->active_mm,
* which will contain the previous user mm when we're in lazy TLB
* mode even if we've already switched back to swapper_pg_dir.
+ *
+ * During switch_mm_irqs_off(), loaded_mm will be set to
+ * LOADED_MM_SWITCHING during the brief interrupts-off window
+ * when CR3 and loaded_mm would otherwise be inconsistent. This
+ * is for nmi_uaccess_okay()'s benefit.
*/
struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
u16 loaded_mm_asid;
u16 next_asid;
/* last user mm's ctx id */
@@ -246,6 +254,42 @@ struct tlb_state {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm. It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ struct mm_struct *current_mm = current->mm;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON_ONCE(!loaded_mm);
+#endif
+
+ /*
+ * The condition we want to check is
+ * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
+ * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+ * is supposed to be reasonably fast.
+ *
+ * Instead, we check the almost equivalent but somewhat conservative
+ * condition below, and we rely on the fact that switch_mm_irqs_off()
+ * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+ */
+ if (loaded_mm != current_mm)
+ return false;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+#endif
+
+ return true;
+}
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <asm/tlbflush.h>
+
/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
if (__range_not_ok(from, n, TASK_SIZE))
return n;
+ if (!nmi_uaccess_okay())
+ return n;
+
/*
* Even though this function is typically called from NMI/IRQ context
* disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e721cf2d4290..707d2b2a333f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -304,6 +304,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ /* Let nmi_uaccess_okay() know that we're changing CR3. */
+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ barrier();
+
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -334,6 +338,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+ /* Make sure we write CR3 before loaded_mm. */
+ barrier();
+
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
--
2.17.1
next reply index
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-29 15:47 Andy Lutomirski [this message]
2018-08-29 16:06 ` Rik van Riel
2018-08-29 18:56 ` Nadav Amit
2018-08-30 13:36 ` Thomas Gleixner
2018-08-30 14:21 ` Andy Lutomirski
2018-08-30 14:27 ` Thomas Gleixner
2018-08-30 14:39 ` [tip:x86/urgent] x86/nmi: Fix NMI uaccess race against CR3 switching tip-bot for Andy Lutomirski
2018-08-31 15:13 ` tip-bot for Andy Lutomirski
-- strict thread matches above, loose matches on Subject: below --
2018-08-27 23:04 [PATCH] x86/nmi: Fix some races in NMI uaccess Andy Lutomirski
2018-08-28 17:56 ` [PATCH v2] " Rik van Riel
2018-08-29 3:46 ` Andy Lutomirski
2018-08-29 15:17 ` Rik van Riel
2018-08-29 15:36 ` Andy Lutomirski
2018-08-29 15:49 ` Rik van Riel
2018-08-29 16:14 ` Andy Lutomirski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=dd956eba16646fd0b15c3c0741269dfd84452dac.1535557289.git.luto@kernel.org \
--to=luto@kernel.org \
--cc=bp@alien8.de \
--cc=jannh@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=nadav.amit@gmail.com \
--cc=peterz@infradead.org \
--cc=riel@surriel.com \
--cc=stable@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
LKML Archive on lore.kernel.org
Archives are clonable:
git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
linux-kernel@vger.kernel.org
public-inbox-index lkml
Example config snippet for mirrors
Newsgroup available over NNTP:
nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git