LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Andy Lutomirski <luto@kernel.org>
To: x86@kernel.org, Nadav Amit <nadav.amit@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>, Rik van Riel <riel@surriel.com>,
	Jann Horn <jannh@google.com>, LKML <linux-kernel@vger.kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	stable@vger.kernel.org, Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH v2] x86/nmi: Fix some races in NMI uaccess
Date: Wed, 29 Aug 2018 08:47:18 -0700
Message-ID: <dd956eba16646fd0b15c3c0741269dfd84452dac.1535557289.git.luto@kernel.org> (raw)

In NMI context, we might be in the middle of context switching or in
the middle of switch_mm_irqs_off().  In either case, CR3 might not
match current->mm, which could cause copy_from_user_nmi() and
friends to read the wrong memory.

Fix it by adding a new nmi_uaccess_okay() helper and checking it in
copy_from_user_nmi() and in __copy_from_user_nmi()'s callers.

Cc: stable@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Nadav, this is intended for your series.  Want to add it right
before the use_temporary_mm() stuff?

Changes from v1:
 - Improved comments
 - Add debugging
 - Fix bugs

arch/x86/events/core.c          |  2 +-
 arch/x86/include/asm/tlbflush.h | 44 +++++++++++++++++++++++++++++++++
 arch/x86/lib/usercopy.c         |  5 ++++
 arch/x86/mm/tlb.c               |  7 ++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 
 	perf_callchain_store(entry, regs->ip);
 
-	if (!current->mm)
+	if (!nmi_uaccess_okay())
 		return;
 
 	if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..183d36a98b04 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,8 +175,16 @@ struct tlb_state {
 	 * are on.  This means that it may not match current->active_mm,
 	 * which will contain the previous user mm when we're in lazy TLB
 	 * mode even if we've already switched back to swapper_pg_dir.
+	 *
+	 * During switch_mm_irqs_off(), loaded_mm will be set to
+	 * LOADED_MM_SWITCHING during the brief interrupts-off window
+	 * when CR3 and loaded_mm would otherwise be inconsistent.  This
+	 * is for nmi_uaccess_okay()'s benefit.
 	 */
 	struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
 	u16 loaded_mm_asid;
 	u16 next_asid;
 	/* last user mm's ctx id */
@@ -246,6 +254,42 @@ struct tlb_state {
 };
 DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
 
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm.  It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	struct mm_struct *current_mm = current->mm;
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON_ONCE(!loaded_mm);
+#endif
+
+	/*
+	 * The condition we want to check is
+	 * current_mm->pgd == __va(read_cr3_pa()).  This may be slow, though,
+	 * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+	 * is supposed to be reasonably fast.
+	 *
+	 * Instead, we check the almost equivalent but somewhat conservative
+	 * condition below, and we rely on the fact that switch_mm_irqs_off()
+	 * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+	 */
+	if (loaded_mm != current_mm)
+		return false;
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+#endif
+
+	return true;
+}
+
 /* Initialize cr4 shadow for this CPU. */
 static inline void cr4_init_shadow(void)
 {
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
 #include <linux/uaccess.h>
 #include <linux/export.h>
 
+#include <asm/tlbflush.h>
+
 /*
  * We rely on the nested NMI work to allow atomic faults from the NMI path; the
  * nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	if (__range_not_ok(from, n, TASK_SIZE))
 		return n;
 
+	if (!nmi_uaccess_okay())
+		return n;
+
 	/*
 	 * Even though this function is typically called from NMI/IRQ context
 	 * disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e721cf2d4290..707d2b2a333f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -304,6 +304,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
 
+		/* Let nmi_uaccess_okay() know that we're changing CR3. */
+		this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+		barrier();
+
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -334,6 +338,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (next != &init_mm)
 			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
 
+		/* Make sure we write CR3 before loaded_mm. */
+		barrier();
+
 		this_cpu_write(cpu_tlbstate.loaded_mm, next);
 		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
 	}
-- 
2.17.1


             reply index

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-29 15:47 Andy Lutomirski [this message]
2018-08-29 16:06 ` Rik van Riel
2018-08-29 18:56 ` Nadav Amit
2018-08-30 13:36   ` Thomas Gleixner
2018-08-30 14:21     ` Andy Lutomirski
2018-08-30 14:27       ` Thomas Gleixner
2018-08-30 14:39 ` [tip:x86/urgent] x86/nmi: Fix NMI uaccess race against CR3 switching tip-bot for Andy Lutomirski
2018-08-31 15:13 ` tip-bot for Andy Lutomirski
  -- strict thread matches above, loose matches on Subject: below --
2018-08-27 23:04 [PATCH] x86/nmi: Fix some races in NMI uaccess Andy Lutomirski
2018-08-28 17:56 ` [PATCH v2] " Rik van Riel
2018-08-29  3:46   ` Andy Lutomirski
2018-08-29 15:17     ` Rik van Riel
2018-08-29 15:36       ` Andy Lutomirski
2018-08-29 15:49         ` Rik van Riel
2018-08-29 16:14           ` Andy Lutomirski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dd956eba16646fd0b15c3c0741269dfd84452dac.1535557289.git.luto@kernel.org \
    --to=luto@kernel.org \
    --cc=bp@alien8.de \
    --cc=jannh@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nadav.amit@gmail.com \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=stable@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git