linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] spurious check for user space address
@ 2018-11-27 11:16 Luming Yu
  2018-11-27 11:16 ` [PATCH 2/2] spurious check tace point Luming Yu
  0 siblings, 1 reply; 2+ messages in thread
From: Luming Yu @ 2018-11-27 11:16 UTC (permalink / raw)
  To: linux-kernel

we saw a suspected spurious fault that failed VMA
access check but passed spurious check for a user
space address access of (rw,execut). The patch is
trying to catch the case which might have indicated
a stale TLB (software bug found) and a harmful
spruious fault.

Signed-off-by: Luming Yu <luming.yu@intel.com>
Signed-off-by: Yongkai Wu <yongkaiwu@tencent.com>
---
 arch/x86/mm/fault.c | 95 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 71d4b9d4d43f..5e2a49010d87 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1004,29 +1004,7 @@ static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
 	return 1;
 }
 
-/*
- * Handle a spurious fault caused by a stale TLB entry.
- *
- * This allows us to lazily refresh the TLB when increasing the
- * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
- * eagerly is very expensive since that implies doing a full
- * cross-processor TLB flush, even if no stale TLB entries exist
- * on other processors.
- *
- * Spurious faults may only occur if the TLB contains an entry with
- * fewer permission than the page table entry.  Non-present (P = 0)
- * and reserved bit (R = 1) faults are never spurious.
- *
- * There are no security implications to leaving a stale TLB when
- * increasing the permissions on a page.
- *
- * Returns non-zero if a spurious fault was handled, zero otherwise.
- *
- * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
- * (Optional Invalidation).
- */
-static noinline int
-spurious_kernel_fault(unsigned long error_code, unsigned long address)
+static int spurious_page_table_check(unsigned long error_code, unsigned long address)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
@@ -1035,19 +1013,6 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
 	pte_t *pte;
 	int ret;
 
-	/*
-	 * Only writes to RO or instruction fetches from NX may cause
-	 * spurious faults.
-	 *
-	 * These could be from user or supervisor accesses but the TLB
-	 * is only lazily flushed after a kernel mapping protection
-	 * change, so user accesses are not expected to cause spurious
-	 * faults.
-	 */
-	if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
-	    error_code != (X86_PF_INSTR | X86_PF_PROT))
-		return 0;
-
 	pgd = init_mm.pgd + pgd_index(address);
 	if (!pgd_present(*pgd))
 		return 0;
@@ -1090,12 +1055,52 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
 
 	return ret;
 }
+
+/*
+ * Handle a spurious fault caused by a stale TLB entry.
+ *
+ * This allows us to lazily refresh the TLB when increasing the
+ * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
+ * eagerly is very expensive since that implies doing a full
+ * cross-processor TLB flush, even if no stale TLB entries exist
+ * on other processors.
+ *
+ * Spurious faults may only occur if the TLB contains an entry with
+ * fewer permission than the page table entry.  Non-present (P = 0)
+ * and reserved bit (R = 1) faults are never spurious.
+ *
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ *
+ * Returns non-zero if a spurious fault was handled, zero otherwise.
+ *
+ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
+ * (Optional Invalidation).
+ */
+static noinline int
+spurious_kernel_fault(unsigned long error_code, unsigned long address)
+{
+	/*
+	 * Only writes to RO or instruction fetches from NX may cause
+	 * spurious faults.
+	 *
+	 * These could be from user or supervisor accesses but the TLB
+	 * is only lazily flushed after a kernel mapping protection
+	 * change, so user accesses are not expected to cause spurious
+	 * faults.
+	 */
+	if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
+	    error_code != (X86_PF_INSTR | X86_PF_PROT))
+		return 0;
+
+	return spurious_page_table_check(error_code, address);
+}
 NOKPROBE_SYMBOL(spurious_kernel_fault);
 
 int show_unhandled_signals = 1;
 
 static inline int
-access_error(unsigned long error_code, struct vm_area_struct *vma)
+access_error(unsigned long error_code, unsigned long address, struct vm_area_struct *vma)
 {
 	/* This is only called for the current mm, so: */
 	bool foreign = false;
@@ -1120,19 +1125,27 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	if (error_code & X86_PF_WRITE) {
 		/* write, present and write, not present: */
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
-			return 1;
+			goto maybe_spurious;
 		return 0;
 	}
 
 	/* read, present: */
 	if (unlikely(error_code & X86_PF_PROT))
-		return 1;
+		goto maybe_spurious;
 
 	/* read, not present: */
 	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
-		return 1;
+		goto maybe_spurious;
 
 	return 0;
+
+maybe_spurious:
+	if (spurious_page_table_check(error_code, address)) {
+		WARN_ONCE(1, "spurious fault? Failed VMA check, passed page table check!\n");
+		return 0;
+	}
+
+	return 1;
 }
 
 static int fault_in_kernel_space(unsigned long address)
@@ -1402,7 +1415,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 * we can handle it..
 	 */
 good_area:
-	if (unlikely(access_error(sw_error_code, vma))) {
+	if (unlikely(access_error(sw_error_code, address, vma))) {
 		bad_area_access_error(regs, sw_error_code, address, vma);
 		return;
 	}
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] spurious check tace point
  2018-11-27 11:16 [PATCH 1/2] spurious check for user space address Luming Yu
@ 2018-11-27 11:16 ` Luming Yu
  0 siblings, 0 replies; 2+ messages in thread
From: Luming Yu @ 2018-11-27 11:16 UTC (permalink / raw)
  To: linux-kernel

Add a trace point for debugging spurious fault problem.

Signed-off-by: Luming Yu <luming.yu@intel.com>
Signed-off-by: Yongkai Wu <yongkaiwu@tencent.com>
---
 arch/x86/include/asm/trace/exceptions.h | 30 ++++++++++++++++++++++++++++++
 arch/x86/mm/fault.c                     |  2 ++
 2 files changed, 32 insertions(+)

diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 69615e387973..0cae50c5fcb2 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -44,6 +44,36 @@ DEFINE_EVENT_FN(x86_exceptions, name,				\
 DEFINE_PAGE_FAULT_EVENT(page_fault_user);
 DEFINE_PAGE_FAULT_EVENT(page_fault_kernel);
 
+DECLARE_EVENT_CLASS(spurious_fault_check,
+
+	TP_PROTO(unsigned long error_code,
+		unsigned long pte),
+	TP_ARGS(error_code,
+		pte),
+	TP_STRUCT__entry(
+		__field(unsigned long, error_code)
+		__field(unsigned long, pte)
+	),
+	TP_fast_assign(
+		__entry->error_code = error_code;
+		__entry->pte = pte;
+	),
+	TP_printk("error_code=%lx pte=%lx",
+		 __entry->error_code,
+		 __entry->pte
+		)
+);
+
+#define DEFINE_SPURIOUS_FAULT_CHECK_EVENT(name)	\
+DEFINE_EVENT_FN(spurious_fault_check, name,	\
+	TP_PROTO(unsigned long error_code,	\
+		 unsigned long pte),			\
+	TP_ARGS(error_code, pte),	\
+	trace_pagefault_reg,		\
+	trace_pagefault_unreg);
+
+DEFINE_SPURIOUS_FAULT_CHECK_EVENT(spurious_fault_1_sample);
+
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE exceptions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5e2a49010d87..aa9bc9609c68 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1001,6 +1001,8 @@ static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
 	if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
 		return 0;
 
+	trace_spurious_fault_1_sample(error_code, (unsigned long)pte);
+
 	return 1;
 }
 
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-11-27 11:16 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-27 11:16 [PATCH 1/2] spurious check for user space address Luming Yu
2018-11-27 11:16 ` [PATCH 2/2] spurious check tace point Luming Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).