linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 1/4] x86/insn-eval: Add support for 64-bit kernel mode
@ 2019-12-09 14:31 Jann Horn
  2019-12-09 14:31 ` [PATCH v6 2/4] x86/traps: Print address on #GP Jann Horn
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Jann Horn @ 2019-12-09 14:31 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	kasan-dev, jannh
  Cc: linux-kernel, Andrey Konovalov, Andy Lutomirski, Sean Christopherson

To support evaluating 64-bit kernel mode instructions:

Replace existing checks for user_64bit_mode() with a new helper that
checks whether code is being executed in either 64-bit kernel mode or
64-bit user mode.

Select the GS base depending on whether the instruction is being
evaluated in kernel mode.

Signed-off-by: Jann Horn <jannh@google.com>
---

Notes:
    v2-v6:
      no changes

 arch/x86/include/asm/ptrace.h | 13 +++++++++++++
 arch/x86/lib/insn-eval.c      | 26 +++++++++++++++-----------
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5057a8ed100b..ac45b06941a5 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
+/*
+ * Determine whether the register set came from any context that is running in
+ * 64-bit mode.
+ */
+static inline bool any_64bit_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+	return !user_mode(regs) || user_64bit_mode(regs);
+#else
+	return false;
+#endif
+}
+
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()	current_pt_regs()->sp
 #define compat_user_stack_pointer()	current_pt_regs()->sp
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 306c3a0902ba..31600d851fd8 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff)
  */
 static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
 {
-	if (user_64bit_mode(regs))
+	if (any_64bit_mode(regs))
 		return INAT_SEG_REG_IGNORE;
 	/*
 	 * Resolve the default segment register as described in Section 3.7.4
@@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
 	 * which may be invalid at this point.
 	 */
 	if (regoff == offsetof(struct pt_regs, ip)) {
-		if (user_64bit_mode(regs))
+		if (any_64bit_mode(regs))
 			return INAT_SEG_REG_IGNORE;
 		else
 			return INAT_SEG_REG_CS;
@@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
 	 * In long mode, segment override prefixes are ignored, except for
 	 * overrides for FS and GS.
 	 */
-	if (user_64bit_mode(regs)) {
+	if (any_64bit_mode(regs)) {
 		if (idx != INAT_SEG_REG_FS &&
 		    idx != INAT_SEG_REG_GS)
 			idx = INAT_SEG_REG_IGNORE;
@@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
 		 */
 		return (unsigned long)(sel << 4);
 
-	if (user_64bit_mode(regs)) {
+	if (any_64bit_mode(regs)) {
 		/*
 		 * Only FS or GS will have a base address, the rest of
 		 * the segments' bases are forced to 0.
 		 */
 		unsigned long base;
 
-		if (seg_reg_idx == INAT_SEG_REG_FS)
+		if (seg_reg_idx == INAT_SEG_REG_FS) {
 			rdmsrl(MSR_FS_BASE, base);
-		else if (seg_reg_idx == INAT_SEG_REG_GS)
+		} else if (seg_reg_idx == INAT_SEG_REG_GS) {
 			/*
 			 * swapgs was called at the kernel entry point. Thus,
 			 * MSR_KERNEL_GS_BASE will have the user-space GS base.
 			 */
-			rdmsrl(MSR_KERNEL_GS_BASE, base);
-		else
+			if (user_mode(regs))
+				rdmsrl(MSR_KERNEL_GS_BASE, base);
+			else
+				rdmsrl(MSR_GS_BASE, base);
+		} else {
 			base = 0;
+		}
 		return base;
 	}
 
@@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
 	if (sel < 0)
 		return 0;
 
-	if (user_64bit_mode(regs) || v8086_mode(regs))
+	if (any_64bit_mode(regs) || v8086_mode(regs))
 		return -1L;
 
 	if (!sel)
@@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
 	 * following instruction.
 	 */
 	if (*regoff == -EDOM) {
-		if (user_64bit_mode(regs))
+		if (any_64bit_mode(regs))
 			tmp = regs->ip + insn->length;
 		else
 			tmp = 0;
@@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs)
 	 * After computed, the effective address is treated as an unsigned
 	 * quantity.
 	 */
-	if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
+	if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
 		goto out;
 
 	/*
-- 
2.24.0.393.g34dc348eaf-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-09 14:31 [PATCH v6 1/4] x86/insn-eval: Add support for 64-bit kernel mode Jann Horn
@ 2019-12-09 14:31 ` Jann Horn
  2019-12-11 17:06   ` Borislav Petkov
  2019-12-09 14:31 ` [PATCH v6 3/4] x86/dumpstack: Split out header line printing from __die() Jann Horn
  2019-12-09 14:31 ` [PATCH v6 4/4] x86/kasan: Print original address on #GP Jann Horn
  2 siblings, 1 reply; 12+ messages in thread
From: Jann Horn @ 2019-12-09 14:31 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	kasan-dev, jannh
  Cc: linux-kernel, Andrey Konovalov, Andy Lutomirski, Sean Christopherson

A frequent cause of #GP exceptions are memory accesses to non-canonical
addresses. Unlike #PF, #GP doesn't come with a fault address in CR2, so
the kernel doesn't currently print the fault address for #GP.
Luckily, we already have the necessary infrastructure for decoding X86
instructions and computing the memory address that is being accessed;
hook it up to the #GP handler so that we can figure out the address
operand of the faulting instruction and print it.

Distinguish two cases:
  a) (Part of) the memory range being accessed lies in the non-canonical
     address range; in this case, is is likely that the address we
     decoded is actually the one that caused the #GP.
  b) The entire memory range of the operand we decoded lies in canonical
     address space; the #GP may or may not be related in some way to the
     address we computed. We'll still print it, but with hedging
     language in the message.

While it is already possible to compute the faulting address manually by
disassembling the opcode dump and evaluating the instruction against the
register dump, this should make it slightly easier to identify crashes
at a glance.

Note that the operand length, which we get from the instruction decoder
and use to determine whether the access straddles into non-canonical
address space, is currently somewhat unreliable; but it should be good
enough, considering that Linux on x86-64 never maps the page directly
before the start of the non-canonical range anyway, and therefore the
case where a memory range begins in that page and potentially straddles
into the non-canonical range should be fairly uncommon.
And if we do get this wrong, it only influences whether the error
message claims that the access is canonical.

Reviewed-and-tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Jann Horn <jannh@google.com>
---

Notes:
    v2:
     - print different message for segment-related GP (Borislav)
     - rewrite check for non-canonical address (Sean)
     - make it clear we don't know for sure why the GP happened (Andy)
    v3:
     - change message format to one line (Borislav)
    v4:
     - rename insn_bytes to insn_buf (Ingo)
     - add space after GPFSTR (Ingo)
     - make sizeof(desc) clearer (Ingo, Borislav)
     - also print the address (with a different message) if it's canonical (Ingo)
    v5:
     - reword comment on get_kernel_gp_address() (Sean)
     - make get_kernel_gp_address() also work on 32-bit (Sean)
     - minor nits (Sean)
     - more hedging for canonical GP (Sean)
     - let get_kernel_gp_address() return an enum (Sean)
     - rewrite commit message
    v6:
     - add comma after GPFSTR (Sean)
     - reorder variable declarations (Sean)
    
    I have already sent a patch to syzkaller that relaxes their parsing of GPF
    messages (https://github.com/google/syzkaller/commit/432c7650) such that
    changes like the one in this patch don't break it.
    That patch has already made its way into syzbot's syzkaller instances
    according to <https://syzkaller.appspot.com/upstream>.

 arch/x86/kernel/traps.c | 70 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f19de6f45d48..c8b4ae6aed5b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -56,6 +56,8 @@
 #include <asm/mpx.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -518,10 +520,55 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 	do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
 }
 
+enum kernel_gp_hint {
+	GP_NO_HINT,
+	GP_NON_CANONICAL,
+	GP_CANONICAL
+};
+
+/*
+ * When an uncaught #GP occurs, try to determine a memory address accessed by
+ * the instruction and return that address to the caller.
+ * Also try to figure out whether any part of the access to that address was
+ * non-canonical.
+ */
+static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
+						 unsigned long *addr)
+{
+	u8 insn_buf[MAX_INSN_SIZE];
+	struct insn insn;
+
+	if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+		return GP_NO_HINT;
+
+	kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+	insn_get_modrm(&insn);
+	insn_get_sib(&insn);
+	*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+
+	if (*addr == -1UL)
+		return GP_NO_HINT;
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Check that:
+	 *  - the operand is not in the kernel half
+	 *  - the last byte of the operand is not in the user canonical half
+	 */
+	if (*addr < ~__VIRTUAL_MASK &&
+	    *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
+		return GP_NON_CANONICAL;
+#endif
+
+	return GP_CANONICAL;
+}
+
+#define GPFSTR "general protection fault"
+
 dotraplinkage void
 do_general_protection(struct pt_regs *regs, long error_code)
 {
-	const char *desc = "general protection fault";
+	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
 	struct task_struct *tsk;
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
@@ -540,6 +587,9 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	tsk = current;
 	if (!user_mode(regs)) {
+		enum kernel_gp_hint hint = GP_NO_HINT;
+		unsigned long gp_addr;
+
 		if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
 			return;
 
@@ -556,8 +606,22 @@ do_general_protection(struct pt_regs *regs, long error_code)
 			return;
 
 		if (notify_die(DIE_GPF, desc, regs, error_code,
-			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
-			die(desc, regs, error_code);
+			       X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
+			return;
+
+		if (error_code)
+			snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
+		else
+			hint = get_kernel_gp_address(regs, &gp_addr);
+
+		if (hint != GP_NO_HINT)
+			snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
+				 (hint == GP_NON_CANONICAL) ?
+				 "probably for non-canonical address" :
+				 "maybe for address",
+				 gp_addr);
+
+		die(desc, regs, error_code);
 		return;
 	}
 
-- 
2.24.0.393.g34dc348eaf-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v6 3/4] x86/dumpstack: Split out header line printing from __die()
  2019-12-09 14:31 [PATCH v6 1/4] x86/insn-eval: Add support for 64-bit kernel mode Jann Horn
  2019-12-09 14:31 ` [PATCH v6 2/4] x86/traps: Print address on #GP Jann Horn
@ 2019-12-09 14:31 ` Jann Horn
  2019-12-09 14:31 ` [PATCH v6 4/4] x86/kasan: Print original address on #GP Jann Horn
  2 siblings, 0 replies; 12+ messages in thread
From: Jann Horn @ 2019-12-09 14:31 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	kasan-dev, jannh
  Cc: linux-kernel, Andrey Konovalov, Andy Lutomirski, Sean Christopherson

Split __die() into __die_header() and __die_body(). This allows callers to
insert extra information below the header line that initiates the bug
report.

This can e.g. be used by __die() callers to allow KASAN to print additional
information below the header line of the bug report.

Signed-off-by: Jann Horn <jannh@google.com>
---

Notes:
    v3:
      new patch
    v4-v6:
      no changes

 arch/x86/include/asm/kdebug.h |  3 +++
 arch/x86/kernel/dumpstack.c   | 13 ++++++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..a0050fabce42 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -33,6 +33,9 @@ enum show_regs_mode {
 };
 
 extern void die(const char *, struct pt_regs *,long);
+extern void __die_header(const char *str, struct pt_regs *regs, long err);
+extern int __must_check __die_body(const char *str, struct pt_regs *regs,
+				   long err);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e07424e19274..6436f3f5f803 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 }
 NOKPROBE_SYMBOL(oops_end);
 
-int __die(const char *str, struct pt_regs *regs, long err)
+void __die_header(const char *str, struct pt_regs *regs, long err)
 {
 	const char *pr = "";
 
@@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
 	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
 	       IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
 	       (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
+}
+NOKPROBE_SYMBOL(__die_header);
 
+int __die_body(const char *str, struct pt_regs *regs, long err)
+{
 	show_regs(regs);
 	print_modules();
 
@@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
 
 	return 0;
 }
+NOKPROBE_SYMBOL(__die_body);
+
+int __die(const char *str, struct pt_regs *regs, long err)
+{
+	__die_header(str, regs, err);
+	return __die_body(str, regs, err);
+}
 NOKPROBE_SYMBOL(__die);
 
 /*
-- 
2.24.0.393.g34dc348eaf-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v6 4/4] x86/kasan: Print original address on #GP
  2019-12-09 14:31 [PATCH v6 1/4] x86/insn-eval: Add support for 64-bit kernel mode Jann Horn
  2019-12-09 14:31 ` [PATCH v6 2/4] x86/traps: Print address on #GP Jann Horn
  2019-12-09 14:31 ` [PATCH v6 3/4] x86/dumpstack: Split out header line printing from __die() Jann Horn
@ 2019-12-09 14:31 ` Jann Horn
  2019-12-11 17:37   ` Borislav Petkov
  2 siblings, 1 reply; 12+ messages in thread
From: Jann Horn @ 2019-12-09 14:31 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	kasan-dev, jannh
  Cc: linux-kernel, Andrey Konovalov, Andy Lutomirski, Sean Christopherson

Make #GP exceptions caused by out-of-bounds KASAN shadow accesses easier
to understand by computing the address of the original access and
printing that. More details are in the comments in the patch.

This turns an error like this:

    kasan: CONFIG_KASAN_INLINE enabled
    kasan: GPF could be caused by NULL-ptr deref or user memory access
    general protection fault, probably for non-canonical address
        0xe017577ddf75b7dd: 0000 [#1] PREEMPT SMP KASAN PTI

into this:

    general protection fault, probably for non-canonical address
        0xe017577ddf75b7dd: 0000 [#1] PREEMPT SMP KASAN PTI
    KASAN: maybe wild-memory-access in range
        [0x00badbeefbadbee8-0x00badbeefbadbeef]

The hook is placed in architecture-independent code, but is currently
only wired up to the X86 exception handler because I'm not sufficiently
familiar with the address space layout and exception handling mechanisms
on other architectures.

Signed-off-by: Jann Horn <jannh@google.com>
---

Notes:
    v2:
     - move to mm/kasan/report.c (Dmitry)
     - change hook name to be more generic
     - use TASK_SIZE instead of TASK_SIZE_MAX for compiling on non-x86
     - don't open-code KASAN_SHADOW_MASK (Dmitry)
     - add "KASAN: " prefix, but not "BUG: " (Andrey, Dmitry)
     - use same naming scheme as get_wild_bug_type (Andrey)
     - this version was "Reviewed-by: Dmitry Vyukov <dvyukov@google.com>"
    v3:
     - adjusted example output in commit message based on
       changes in preceding patch
     - ensure that KASAN output happens after bust_spinlocks(1)
     - moved hook in arch/x86/kernel/traps.c such that output
       appears after the first line of KASAN-independent error report
    v4:
     - adjust patch to changes in x86/traps patch
    v5:
     - adjust patch to changes in x86/traps patch
     - fix bug introduced in v3: remove die() call after oops_end()
    v6:
     - adjust sample output in commit message

 arch/x86/kernel/traps.c     | 12 ++++++++++-
 arch/x86/mm/kasan_init_64.c | 21 -------------------
 include/linux/kasan.h       |  6 ++++++
 mm/kasan/report.c           | 40 +++++++++++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c8b4ae6aed5b..7813592b4fb3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,6 +37,7 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/kasan.h>
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
@@ -589,6 +590,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	if (!user_mode(regs)) {
 		enum kernel_gp_hint hint = GP_NO_HINT;
 		unsigned long gp_addr;
+		unsigned long flags;
+		int sig;
 
 		if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
 			return;
@@ -621,7 +624,14 @@ do_general_protection(struct pt_regs *regs, long error_code)
 				 "maybe for address",
 				 gp_addr);
 
-		die(desc, regs, error_code);
+		flags = oops_begin();
+		sig = SIGSEGV;
+		__die_header(desc, regs, error_code);
+		if (hint == GP_NON_CANONICAL)
+			kasan_non_canonical_hook(gp_addr);
+		if (__die_body(desc, regs, error_code))
+			sig = 0;
+		oops_end(flags, regs, sig);
 		return;
 	}
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index cf5bc37c90ac..763e71abc0fe 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)
 	} while (pgd++, addr = next, addr != (unsigned long)end);
 }
 
-#ifdef CONFIG_KASAN_INLINE
-static int kasan_die_handler(struct notifier_block *self,
-			     unsigned long val,
-			     void *data)
-{
-	if (val == DIE_GPF) {
-		pr_emerg("CONFIG_KASAN_INLINE enabled\n");
-		pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block kasan_die_notifier = {
-	.notifier_call = kasan_die_handler,
-};
-#endif
-
 void __init kasan_early_init(void)
 {
 	int i;
@@ -341,10 +324,6 @@ void __init kasan_init(void)
 	int i;
 	void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
 
-#ifdef CONFIG_KASAN_INLINE
-	register_die_notifier(&kasan_die_notifier);
-#endif
-
 	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
 
 	/*
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4f404c565db1..e0238af0388f 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -225,4 +225,10 @@ static inline void kasan_release_vmalloc(unsigned long start,
 					 unsigned long free_region_end) {}
 #endif
 
+#ifdef CONFIG_KASAN_INLINE
+void kasan_non_canonical_hook(unsigned long addr);
+#else /* CONFIG_KASAN_INLINE */
+static inline void kasan_non_canonical_hook(unsigned long addr) { }
+#endif /* CONFIG_KASAN_INLINE */
+
 #endif /* LINUX_KASAN_H */
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 621782100eaa..5ef9f24f566b 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
 
 	end_report(&flags);
 }
+
+#ifdef CONFIG_KASAN_INLINE
+/*
+ * With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
+ * canonical half of the address space) cause out-of-bounds shadow memory reads
+ * before the actual access. For addresses in the low canonical half of the
+ * address space, as well as most non-canonical addresses, that out-of-bounds
+ * shadow memory access lands in the non-canonical part of the address space.
+ * Help the user figure out what the original bogus pointer was.
+ */
+void kasan_non_canonical_hook(unsigned long addr)
+{
+	unsigned long orig_addr;
+	const char *bug_type;
+
+	if (addr < KASAN_SHADOW_OFFSET)
+		return;
+
+	orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT;
+	/*
+	 * For faults near the shadow address for NULL, we can be fairly certain
+	 * that this is a KASAN shadow memory access.
+	 * For faults that correspond to shadow for low canonical addresses, we
+	 * can still be pretty sure - that shadow region is a fairly narrow
+	 * chunk of the non-canonical address space.
+	 * But faults that look like shadow for non-canonical addresses are a
+	 * really large chunk of the address space. In that case, we still
+	 * print the decoded address, but make it clear that this is not
+	 * necessarily what's actually going on.
+	 */
+	if (orig_addr < PAGE_SIZE)
+		bug_type = "null-ptr-deref";
+	else if (orig_addr < TASK_SIZE)
+		bug_type = "probably user-memory-access";
+	else
+		bug_type = "maybe wild-memory-access";
+	pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type,
+		 orig_addr, orig_addr + KASAN_SHADOW_MASK);
+}
+#endif
-- 
2.24.0.393.g34dc348eaf-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-09 14:31 ` [PATCH v6 2/4] x86/traps: Print address on #GP Jann Horn
@ 2019-12-11 17:06   ` Borislav Petkov
  2019-12-11 17:22     ` Andy Lutomirski
  2019-12-18 21:55     ` Jann Horn
  0 siblings, 2 replies; 12+ messages in thread
From: Borislav Petkov @ 2019-12-11 17:06 UTC (permalink / raw)
  To: Jann Horn
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	linux-kernel, Andrey Konovalov, Andy Lutomirski,
	Sean Christopherson

On Mon, Dec 09, 2019 at 03:31:18PM +0100, Jann Horn wrote:
>     I have already sent a patch to syzkaller that relaxes their parsing of GPF
>     messages (https://github.com/google/syzkaller/commit/432c7650) such that
>     changes like the one in this patch don't break it.
>     That patch has already made its way into syzbot's syzkaller instances
>     according to <https://syzkaller.appspot.com/upstream>.

Ok, cool.

I still think we should do the oops number marking, though, as it has
more benefits than just syzkaller scanning for it. The first oops has always
been of crucial importance so having the number in there:

[    2.542218] [1] general protection fault while derefing a non-canonical address 0xdfff000000000001: 0000 [#1] PREEMPT SMP
	 	^

would make eyeballing oopses even easier. Basically the same reason why
you're doing this enhancement. :)

So let me know if you don't have time to do it or you don't care about
it etc, and I'll have a look. Independent of those patches, of course -
those look good so far.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-11 17:06   ` Borislav Petkov
@ 2019-12-11 17:22     ` Andy Lutomirski
  2019-12-11 17:29       ` Borislav Petkov
  2019-12-18 21:55     ` Jann Horn
  1 sibling, 1 reply; 12+ messages in thread
From: Andy Lutomirski @ 2019-12-11 17:22 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Jann Horn, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	linux-kernel, Andrey Konovalov, Andy Lutomirski,
	Sean Christopherson



> On Dec 11, 2019, at 9:06 AM, Borislav Petkov <bp@alien8.de> wrote:
> 
> On Mon, Dec 09, 2019 at 03:31:18PM +0100, Jann Horn wrote:
>>    I have already sent a patch to syzkaller that relaxes their parsing of GPF
>>    messages (https://github.com/google/syzkaller/commit/432c7650) such that
>>    changes like the one in this patch don't break it.
>>    That patch has already made its way into syzbot's syzkaller instances
>>    according to <https://syzkaller.appspot.com/upstream>.
> 
> Ok, cool.
> 
> I still think we should do the oops number marking, though, as it has
> more benefits than just syzkaller scanning for it. The first oops has always
> been of crucial importance so having the number in there:
> 
> [    2.542218] [1] general protection fault while derefing a non-canonical address 0xdfff000000000001: 0000 [#1] PREEMPT SMP
>        ^
> 
> would make eyeballing oopses even easier. Basically the same reason why
> you're doing this enhancement. :)
> 

Could we spare a few extra bytes to make this more readable?  I can never keep track of which number is the oops count, which is the cpu, and which is the error code.  How about:

OOPS 1: general protection blah blah blah (CPU 0)

and put in the next couple lines “#GP(0)”.

> So let me know if you don't have time to do it or you don't care about
> it etc, and I'll have a look. Independent of those patches, of course -
> those look good so far.
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>    Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-11 17:22     ` Andy Lutomirski
@ 2019-12-11 17:29       ` Borislav Petkov
  2019-12-11 18:17         ` Andy Lutomirski
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2019-12-11 17:29 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Jann Horn, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	linux-kernel, Andrey Konovalov, Andy Lutomirski,
	Sean Christopherson

On Wed, Dec 11, 2019 at 09:22:30AM -0800, Andy Lutomirski wrote:
> Could we spare a few extra bytes to make this more readable?  I can never keep track of which number is the oops count, which is the cpu, and which is the error code.  How about:
> 
> OOPS 1: general protection blah blah blah (CPU 0)
> 
> and put in the next couple lines “#GP(0)”.

Well, right now it is:

[    2.470492] general protection fault, probably for non-canonical address 0xdfff000000000001: 0000 [#1] PREEMPT SMP
[    2.471615] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc1+ #6

and the CPU is on the second line, the error code is before the number -
[#1] - in that case.

If we pull the number in front, we can do:

[    2.470492] [#1] general protection fault, probably for non-canonical address 0xdfff000000000001: 0000 PREEMPT SMP
[    2.471615] [#1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc1+ #6

and this way you know that the error code is there, after the first
line's description.

I guess we can do:

[    2.470492] [#1] general protection fault, probably for non-canonical address 0xdfff000000000001 Error Code: 0000 PREEMPT SMP

to make it even more explicit...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 4/4] x86/kasan: Print original address on #GP
  2019-12-09 14:31 ` [PATCH v6 4/4] x86/kasan: Print original address on #GP Jann Horn
@ 2019-12-11 17:37   ` Borislav Petkov
  2019-12-18 22:33     ` Jann Horn
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2019-12-11 17:37 UTC (permalink / raw)
  To: Jann Horn
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	linux-kernel, Andrey Konovalov, Andy Lutomirski,
	Sean Christopherson

On Mon, Dec 09, 2019 at 03:31:20PM +0100, Jann Horn wrote:
>  arch/x86/kernel/traps.c     | 12 ++++++++++-
>  arch/x86/mm/kasan_init_64.c | 21 -------------------
>  include/linux/kasan.h       |  6 ++++++
>  mm/kasan/report.c           | 40 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 57 insertions(+), 22 deletions(-)

I need a KASAN person ACK here, I'd guess.

> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index c8b4ae6aed5b..7813592b4fb3 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -37,6 +37,7 @@
>  #include <linux/mm.h>
>  #include <linux/smp.h>
>  #include <linux/io.h>
> +#include <linux/kasan.h>
>  #include <asm/stacktrace.h>
>  #include <asm/processor.h>
>  #include <asm/debugreg.h>
> @@ -589,6 +590,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
>  	if (!user_mode(regs)) {
>  		enum kernel_gp_hint hint = GP_NO_HINT;
>  		unsigned long gp_addr;
> +		unsigned long flags;
> +		int sig;
>  
>  		if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
>  			return;
> @@ -621,7 +624,14 @@ do_general_protection(struct pt_regs *regs, long error_code)
>  				 "maybe for address",
>  				 gp_addr);
>  
> -		die(desc, regs, error_code);
> +		flags = oops_begin();
> +		sig = SIGSEGV;
> +		__die_header(desc, regs, error_code);
> +		if (hint == GP_NON_CANONICAL)
> +			kasan_non_canonical_hook(gp_addr);
> +		if (__die_body(desc, regs, error_code))
> +			sig = 0;
> +		oops_end(flags, regs, sig);

Instead of opencoding it like this, can we add a

	die_addr(desc, regs, error_code, gp_addr);

to arch/x86/kernel/dumpstack.c and call it from here:

	if (hint != GP_NON_CANONICAL)
		gp_addr = 0;

	die_addr(desc, regs, error_code, gp_addr);

This way you won't need to pass down to die_addr() the hint too - you
code into gp_addr whether it was non-canonical or not.

The

+       if (addr < KASAN_SHADOW_OFFSET)
+               return;

check in kasan_non_canonical_hook() would then catch it when addr == 0.

Hmmm?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-11 17:29       ` Borislav Petkov
@ 2019-12-11 18:17         ` Andy Lutomirski
  2019-12-19 11:29           ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: Andy Lutomirski @ 2019-12-11 18:17 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Jann Horn, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	LKML, Andrey Konovalov, Andy Lutomirski, Sean Christopherson

On Wed, Dec 11, 2019 at 9:29 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Wed, Dec 11, 2019 at 09:22:30AM -0800, Andy Lutomirski wrote:
> > Could we spare a few extra bytes to make this more readable?  I can never keep track of which number is the oops count, which is the cpu, and which is the error code.  How about:
> >
> > OOPS 1: general protection blah blah blah (CPU 0)
> >
> > and put in the next couple lines “#GP(0)”.
>
> Well, right now it is:
>
> [    2.470492] general protection fault, probably for non-canonical address 0xdfff000000000001: 0000 [#1] PREEMPT SMP
> [    2.471615] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc1+ #6
>
> and the CPU is on the second line, the error code is before the number -
> [#1] - in that case.
>
> If we pull the number in front, we can do:
>
> [    2.470492] [#1] general protection fault, probably for non-canonical address 0xdfff000000000001: 0000 PREEMPT SMP
> [    2.471615] [#1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc1+ #6
>
> and this way you know that the error code is there, after the first
> line's description.

Hmm, I like that.

>
> I guess we can do:
>
> [    2.470492] [#1] general protection fault, probably for non-canonical address 0xdfff000000000001 Error Code: 0000 PREEMPT SMP
>
> to make it even more explicit...

I like this too.

>
> --
> Regards/Gruss,
>     Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-11 17:06   ` Borislav Petkov
  2019-12-11 17:22     ` Andy Lutomirski
@ 2019-12-18 21:55     ` Jann Horn
  1 sibling, 0 replies; 12+ messages in thread
From: Jann Horn @ 2019-12-18 21:55 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, kernel list, Andrey Konovalov,
	Andy Lutomirski, Sean Christopherson

On Wed, Dec 11, 2019 at 6:06 PM Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Dec 09, 2019 at 03:31:18PM +0100, Jann Horn wrote:
> >     I have already sent a patch to syzkaller that relaxes their parsing of GPF
> >     messages (https://github.com/google/syzkaller/commit/432c7650) such that
> >     changes like the one in this patch don't break it.
> >     That patch has already made its way into syzbot's syzkaller instances
> >     according to <https://syzkaller.appspot.com/upstream>.
>
> Ok, cool.
>
> I still think we should do the oops number marking, though, as it has
> more benefits than just syzkaller scanning for it. The first oops has always
> been of crucial importance so having the number in there:
>
> [    2.542218] [1] general protection fault while derefing a non-canonical address 0xdfff000000000001: 0000 [#1] PREEMPT SMP
>                 ^
>
> would make eyeballing oopses even easier. Basically the same reason why
> you're doing this enhancement. :)
>
> So let me know if you don't have time to do it or you don't care about
> it etc, and I'll have a look.

I don't think I have time to do this in the near future. Feel free to
implement this.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 4/4] x86/kasan: Print original address on #GP
  2019-12-11 17:37   ` Borislav Petkov
@ 2019-12-18 22:33     ` Jann Horn
  0 siblings, 0 replies; 12+ messages in thread
From: Jann Horn @ 2019-12-18 22:33 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, kernel list, Andrey Konovalov,
	Andy Lutomirski, Sean Christopherson

On Wed, Dec 11, 2019 at 6:37 PM Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Dec 09, 2019 at 03:31:20PM +0100, Jann Horn wrote:
> >  arch/x86/kernel/traps.c     | 12 ++++++++++-
> >  arch/x86/mm/kasan_init_64.c | 21 -------------------
> >  include/linux/kasan.h       |  6 ++++++
> >  mm/kasan/report.c           | 40 +++++++++++++++++++++++++++++++++++++
> >  4 files changed, 57 insertions(+), 22 deletions(-)
>
> I need a KASAN person ACK here, I'd guess.

Right, I got a Reviewed-by from Dmitry for v2, but cleared that when I
made changes to the patch later - I'll ask Dmitry for a fresh ack on
the v7 patch.

[...]
> > -             die(desc, regs, error_code);
> > +             flags = oops_begin();
> > +             sig = SIGSEGV;
> > +             __die_header(desc, regs, error_code);
> > +             if (hint == GP_NON_CANONICAL)
> > +                     kasan_non_canonical_hook(gp_addr);
> > +             if (__die_body(desc, regs, error_code))
> > +                     sig = 0;
> > +             oops_end(flags, regs, sig);
>
> Instead of opencoding it like this, can we add a
>
>         die_addr(desc, regs, error_code, gp_addr);
>
> to arch/x86/kernel/dumpstack.c and call it from here:
>
>         if (hint != GP_NON_CANONICAL)
>                 gp_addr = 0;
>
>         die_addr(desc, regs, error_code, gp_addr);

Okay, so I'll make __die_header() and __die_body() static, introduce
and hook up die_addr() in patch 3/4, and then in patch 4/4 insert the
call to the KASAN hook.

> This way you won't need to pass down to die_addr() the hint too - you
> code into gp_addr whether it was non-canonical or not.
>
> The
>
> +       if (addr < KASAN_SHADOW_OFFSET)
> +               return;
>
> check in kasan_non_canonical_hook() would then catch it when addr == 0.

I'll add an explicit check for nonzero address before calling
kasan_non_canonical_hook() so that the semantics are a bit more
cleanly split.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v6 2/4] x86/traps: Print address on #GP
  2019-12-11 18:17         ` Andy Lutomirski
@ 2019-12-19 11:29           ` Borislav Petkov
  0 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2019-12-19 11:29 UTC (permalink / raw)
  To: Andy Lutomirski, Linus Torvalds
  Cc: Jann Horn, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	LKML, Andrey Konovalov, Andy Lutomirski, Sean Christopherson

On Wed, Dec 11, 2019 at 10:17:25AM -0800, Andy Lutomirski wrote:
> On Wed, Dec 11, 2019 at 9:29 AM Borislav Petkov <bp@alien8.de> wrote:
> >
> > On Wed, Dec 11, 2019 at 09:22:30AM -0800, Andy Lutomirski wrote:
> > > Could we spare a few extra bytes to make this more readable?  I can never keep track of which number is the oops count, which is the cpu, and which is the error code.  How about:
> > >
> > > OOPS 1: general protection blah blah blah (CPU 0)
> > >
> > > and put in the next couple lines “#GP(0)”.
> >
> > Well, right now it is:
> >
> > [    2.470492] general protection fault, probably for non-canonical address 0xdfff000000000001: 0000 [#1] PREEMPT SMP
> > [    2.471615] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc1+ #6
> >
> > and the CPU is on the second line, the error code is before the number -
> > [#1] - in that case.
> >
> > If we pull the number in front, we can do:
> >
> > [    2.470492] [#1] general protection fault, probably for non-canonical address 0xdfff000000000001: 0000 PREEMPT SMP
> > [    2.471615] [#1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc1+ #6
> >
> > and this way you know that the error code is there, after the first
> > line's description.
> 
> Hmm, I like that.
> 
> >
> > I guess we can do:
> >
> > [    2.470492] [#1] general protection fault, probably for non-canonical address 0xdfff000000000001 Error Code: 0000 PREEMPT SMP
> >
> > to make it even more explicit...
> 
> I like this too.

Ok, let me add Linus too because I'm sure he would have an opinion here.

@Linus, the idea is to dump the die_counter in front of the oops for two
reasons:

1. It always has been absolutely important to know which the first oops
is.

2. Fuzzing and all those other tools scanning dmesg would not need to
make any adjustments anymore to their grepping regexes because oops
lines would be marked uniquely now.

Here's a first attempt, what do you guys think?

WIP diff follows too.

...
[    3.207442] Freeing unused kernel image (text/rodata gap) memory: 2040K
[    3.209464] Freeing unused kernel image (rodata/data gap) memory: 168K
[    3.221088] x86/mm: Checked W+X mappings: passed, no W+X pages found.
[    3.221885] [0] general protection fault: 0000  PREEMPT SMP
[    3.222590] [0] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc2+ #16
[    3.223388] [0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/2014
[    3.224374] [0] RIP: 0010:kernel_init+0x58/0x107
[    3.224727] [0] Code: 48 c7 c7 c8 74 ea 81 e8 4b 47 8f ff c7 05 c7 b3 95 00 02 00 00 00 e8 4e cd a0 ff e8 b9 2d 90 ff 48 b8 00 00 00 00 00 00 ff df <ff> e0 48 8b 3d 4e 74 d5 00 48 85 ff 74 22 e8 1b f3 82 ff 85 c0 89
[    3.224727] [0] RSP: 0018:ffffc90000013f50 EFLAGS: 00010246[0] 
[    3.224727] [0] RAX: dfff000000000000 RBX: ffffffff817d1b79 RCX: 0000000080aa00a9
[    3.224727] [0] RDX: 0000000080aa00aa RSI: 0000000000000001 RDI: ffff88807d406f00
[    3.224727] [0] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
[    3.224727] [0] R10: 0000000000000001 R11: ffff88807d526d80 R12: 0000000000000000
[    3.224727] [0] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[    3.224727] [0] FS:  0000000000000000(0000) GS:ffff88807da40000(0000) knlGS:0000000000000000
[    3.224727] [0] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.224727] [0] CR2: 0000000000000000 CR3: 0000000002009000 CR4: 00000000003406e0
[    3.224727] [0] Call Trace:
[    3.224727] [0] ret_from_fork+0x22/0x40
[    3.224727] [0] Modules linked in:
[    3.236790] ---[ end trace ef40186b3f9be0f1 ]---
[    3.237430] [0] RIP: 0010:kernel_init+0x58/0x107
[    3.238083] [0] Code: 48 c7 c7 c8 74 ea 81 e8 4b 47 8f ff c7 05 c7 b3 95 00 02 00 00 00 e8 4e cd a0 ff e8 b9 2d 90 ff 48 b8 00 00 00 00 00 00 ff df <ff> e0 48 8b 3d 4e 74 d5 00 48 85 ff 74 22 e8 1b f3 82 ff 85 c0 89
[    3.240176] [0] RSP: 0018:ffffc90000013f50 EFLAGS: 00010246[0] 
[    3.240950] [0] RAX: dfff000000000000 RBX: ffffffff817d1b79 RCX: 0000000080aa00a9
[    3.242486] [0] RDX: 0000000080aa00aa RSI: 0000000000000001 RDI: ffff88807d406f00
[    3.243389] [0] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
[    3.244283] [0] R10: 0000000000000001 R11: ffff88807d526d80 R12: 0000000000000000
[    3.245190] [0] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[    3.246056] [0] FS:  0000000000000000(0000) GS:ffff88807da40000(0000) knlGS:0000000000000000
[    3.246993] [0] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.247750] [0] CR2: 0000000000000000 CR3: 0000000002009000 CR4: 00000000003406e0
[    3.248642] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    3.249180] Kernel Offset: disabled
[    3.249180] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---


---
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..952f0d786bbf 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -35,7 +35,7 @@ enum show_regs_mode {
 extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode, unsigned int die_counter);
 extern void show_iret_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 14db05086bbf..4634852f0536 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -86,9 +86,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
 	return (unsigned long *)task->thread.sp;
 }
 
-void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			unsigned long *stack, char *log_lvl);
-
 /* The form of the top of the frame on the stack */
 struct stack_frame {
 	struct stack_frame *next_frame;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e07424e19274..0058a02d6c54 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -25,7 +25,7 @@
 
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
-static int die_counter;
+static unsigned int die_counter;
 
 static struct pt_regs exec_summary_regs;
 
@@ -68,7 +68,8 @@ static void printk_stack_address(unsigned long address, int reliable,
 				 char *log_lvl)
 {
 	touch_nmi_watchdog();
-	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
+	printk("%s[%d] %s%pB\n", log_lvl, die_counter, reliable ? "" : "? ",
+	       (void *)address);
 }
 
 /*
@@ -108,10 +109,10 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
 
 	if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
 					OPCODE_BUFSIZE)) {
-		printk("%sCode: Bad RIP value.\n", loglvl);
+		printk("%s[%d] Code: Bad RIP value.\n", loglvl, die_counter);
 	} else {
-		printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
-		       __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
+		printk("%s[%d] Code: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
+		       __stringify(EPILOGUE_SIZE) "ph\n", loglvl, die_counter, opcodes,
 		       opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1);
 	}
 }
@@ -119,9 +120,9 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
 void show_ip(struct pt_regs *regs, const char *loglvl)
 {
 #ifdef CONFIG_X86_32
-	printk("%sEIP: %pS\n", loglvl, (void *)regs->ip);
+	printk("%s[%d] EIP: %pS\n", loglvl, die_counter, (void *)regs->ip);
 #else
-	printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
+	printk("%s[%d] RIP: %04x:%pS\n", loglvl, die_counter, (int)regs->cs, (void *)regs->ip);
 #endif
 	show_opcodes(regs, loglvl);
 }
@@ -129,8 +130,8 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
 void show_iret_regs(struct pt_regs *regs)
 {
 	show_ip(regs, KERN_DEFAULT);
-	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
-		regs->sp, regs->flags);
+	printk(KERN_DEFAULT "[%d] RSP: %04x:%016lx EFLAGS: %08lx",
+		die_counter, (int)regs->ss, regs->sp, regs->flags);
 }
 
 static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
@@ -146,7 +147,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
 	 * they can be printed in the right context.
 	 */
 	if (!partial && on_stack(info, regs, sizeof(*regs))) {
-		__show_regs(regs, SHOW_REGS_SHORT);
+		__show_regs(regs, SHOW_REGS_SHORT, die_counter);
 
 	} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
 				       IRET_FRAME_SIZE)) {
@@ -159,8 +160,8 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
 	}
 }
 
-void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			unsigned long *stack, char *log_lvl)
+static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			       unsigned long *stack, char *log_lvl)
 {
 	struct unwind_state state;
 	struct stack_info stack_info = {0};
@@ -168,7 +169,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	int graph_idx = 0;
 	bool partial = false;
 
-	printk("%sCall Trace:\n", log_lvl);
+	printk("%s[%d] Call Trace:\n", log_lvl, die_counter);
 
 	unwind_start(&state, task, regs, stack);
 	stack = stack ? : get_stack_pointer(task, regs);
@@ -207,7 +208,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 		stack_name = stack_type_name(stack_info.type);
 		if (stack_name)
-			printk("%s <%s>\n", log_lvl, stack_name);
+			printk("%s[%d] <%s>\n", log_lvl, die_counter, stack_name);
 
 		if (regs)
 			show_regs_if_on_stack(&stack_info, regs, partial);
@@ -275,7 +276,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		}
 
 		if (stack_name)
-			printk("%s </%s>\n", log_lvl, stack_name);
+			printk("%s[%d] </%s>\n", log_lvl, die_counter, stack_name);
 	}
 }
 
@@ -344,7 +345,9 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	oops_exit();
 
 	/* Executive summary in case the oops scrolled away */
-	__show_regs(&exec_summary_regs, SHOW_REGS_ALL);
+	__show_regs(&exec_summary_regs, SHOW_REGS_ALL, die_counter);
+
+	die_counter++;
 
 	if (!signr)
 		return;
@@ -368,6 +371,7 @@ NOKPROBE_SYMBOL(oops_end);
 int __die(const char *str, struct pt_regs *regs, long err)
 {
 	const char *pr = "";
+	char pfx[5] = { };
 
 	/* Save the regs of the first oops for the executive summary later. */
 	if (!die_counter)
@@ -377,7 +381,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
 		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
 
 	printk(KERN_DEFAULT
-	       "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+	       "[%d] %s: %04lx %s%s%s%s%s\n", die_counter, str, err & 0xffff,
 	       pr,
 	       IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
 	       debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
@@ -385,8 +389,10 @@ int __die(const char *str, struct pt_regs *regs, long err)
 	       IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
 	       (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
 
+	snprintf(pfx, sizeof(pfx), "[%d] ", die_counter);
+
 	show_regs(regs);
-	print_modules();
+	print_modules(pfx);
 
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
@@ -412,9 +418,13 @@ void die(const char *str, struct pt_regs *regs, long err)
 
 void show_regs(struct pt_regs *regs)
 {
-	show_regs_print_info(KERN_DEFAULT);
+	char prf[5] = { };
+
+	snprintf(prf, sizeof(prf), "%s[%d] ", KERN_DEFAULT, die_counter);
+
+	show_regs_print_info(prf);
 
-	__show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL);
+	__show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL, die_counter);
 
 	/*
 	 * When in-kernel, we also print out the stack at the time of the fault..
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 506d66830d4d..83422efb5a4a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -64,7 +64,8 @@
 #include "process.h"
 
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
+		 unsigned int die_counter)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
@@ -74,20 +75,20 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 	show_iret_regs(regs);
 
 	if (regs->orig_ax != -1)
-		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+		pr_cont("[%d] ORIG_RAX: %016lx\n", die_counter, regs->orig_ax);
 	else
-		pr_cont("\n");
-
-	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
-	       regs->ax, regs->bx, regs->cx);
-	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
-	       regs->dx, regs->si, regs->di);
-	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
-	       regs->bp, regs->r8, regs->r9);
-	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
-	       regs->r10, regs->r11, regs->r12);
-	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
-	       regs->r13, regs->r14, regs->r15);
+		pr_cont("[%d] \n", die_counter);
+
+	printk(KERN_DEFAULT "[%d] RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	       die_counter, regs->ax, regs->bx, regs->cx);
+	printk(KERN_DEFAULT "[%d] RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	       die_counter, regs->dx, regs->si, regs->di);
+	printk(KERN_DEFAULT "[%d] RBP: %016lx R08: %016lx R09: %016lx\n",
+	       die_counter,regs->bp, regs->r8, regs->r9);
+	printk(KERN_DEFAULT "[%d] R10: %016lx R11: %016lx R12: %016lx\n",
+	       die_counter, regs->r10, regs->r11, regs->r12);
+	printk(KERN_DEFAULT "[%d] R13: %016lx R14: %016lx R15: %016lx\n",
+	       die_counter, regs->r13, regs->r14, regs->r15);
 
 	if (mode == SHOW_REGS_SHORT)
 		return;
@@ -95,8 +96,8 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 	if (mode == SHOW_REGS_USER) {
 		rdmsrl(MSR_FS_BASE, fs);
 		rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
-		printk(KERN_DEFAULT "FS:  %016lx GS:  %016lx\n",
-		       fs, shadowgs);
+		printk(KERN_DEFAULT "[%d] FS:  %016lx GS:  %016lx\n",
+		       die_counter, fs, shadowgs);
 		return;
 	}
 
@@ -114,12 +115,12 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 	cr3 = __read_cr3();
 	cr4 = __read_cr4();
 
-	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
-	       fs, fsindex, gs, gsindex, shadowgs);
-	printk(KERN_DEFAULT "CS:  %04lx DS: %04x ES: %04x CR0: %016lx\n", regs->cs, ds,
-			es, cr0);
-	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
-			cr4);
+	printk(KERN_DEFAULT "[%d] FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+	       die_counter, fs, fsindex, gs, gsindex, shadowgs);
+	printk(KERN_DEFAULT "[%d] CS:  %04lx DS: %04x ES: %04x CR0: %016lx\n",
+	       die_counter, regs->cs, ds, es, cr0);
+	printk(KERN_DEFAULT "[%d] CR2: %016lx CR3: %016lx CR4: %016lx\n",
+	       die_counter, cr2, cr3, cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
@@ -131,14 +132,14 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 	/* Only print out debug registers if they are in their non-default state. */
 	if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
 	    (d6 == DR6_RESERVED) && (d7 == 0x400))) {
-		printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
-		       d0, d1, d2);
-		printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
-		       d3, d6, d7);
+		printk(KERN_DEFAULT "[%d] DR0: %016lx DR1: %016lx DR2: %016lx\n",
+		       die_counter, d0, d1, d2);
+		printk(KERN_DEFAULT "[%d] DR3: %016lx DR6: %016lx DR7: %016lx\n",
+		       die_counter, d3, d6, d7);
 	}
 
 	if (boot_cpu_has(X86_FEATURE_OSPKE))
-		printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
+		printk(KERN_DEFAULT "[%d] PKRU: %08x\n", die_counter, read_pkru());
 }
 
 void release_thread(struct task_struct *dead_task)
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 903ad91b624f..62f0ecc2643f 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -16,7 +16,7 @@
 void show_regs(struct pt_regs *regs)
 {
 	printk("\n");
-	print_modules();
+	print_modules("");
 	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
 		current->comm, print_tainted(), init_utsname()->release);
 	printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
diff --git a/include/linux/module.h b/include/linux/module.h
index 0c7366c317bd..e83a467e7b9c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -665,7 +665,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned
 int register_module_notifier(struct notifier_block *nb);
 int unregister_module_notifier(struct notifier_block *nb);
 
-extern void print_modules(void);
+extern void print_modules(const char *pfx);
 
 static inline bool module_requested_async_probing(struct module *module)
 {
@@ -809,7 +809,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb)
 
 #define module_put_and_exit(code) do_exit(code)
 
-static inline void print_modules(void)
+static inline void print_modules(const char *pfx)
 {
 }
 
diff --git a/init/main.c b/init/main.c
index f9d9701d600c..890208fa7430 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1127,6 +1127,9 @@ static int __ref kernel_init(void *unused)
 
 	rcu_end_inkernel_boot();
 
+	asm volatile("mov $0xdfff000000000000, %rax\n\t"
+		     "jmpq *%rax\n\t");
+
 	if (ramdisk_execute_command) {
 		ret = run_init_process(ramdisk_execute_command);
 		if (!ret)
diff --git a/kernel/module.c b/kernel/module.c
index ac058a5ad1d1..9d73718ce30b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4476,12 +4476,12 @@ struct module *__module_text_address(unsigned long addr)
 EXPORT_SYMBOL_GPL(__module_text_address);
 
 /* Don't grab lock, we're oopsing. */
-void print_modules(void)
+void print_modules(const char *pfx)
 {
 	struct module *mod;
 	char buf[MODULE_FLAGS_BUF_SIZE];
 
-	printk(KERN_DEFAULT "Modules linked in:");
+	printk(KERN_DEFAULT "%sModules linked in:", pfx);
 	/* Most callers should already have preempt disabled, but make sure */
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
diff --git a/kernel/panic.c b/kernel/panic.c
index b69ee9e76cb2..056b6448fec0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -582,7 +582,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 		panic("panic_on_warn set ...\n");
 	}
 
-	print_modules();
+	print_modules("");
 
 	if (regs)
 		show_regs(regs);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 15508c202bf5..fda7ce233344 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3853,7 +3853,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
 		prev->comm, prev->pid, preempt_count());
 
 	debug_show_held_locks(prev);
-	print_modules();
+	print_modules("");
 	if (irqs_disabled())
 		print_irqtrace_events(prev);
 	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f41334ef0971..597ce03f9f2c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -448,7 +448,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
 		__this_cpu_write(softlockup_task_ptr_saved, current);
-		print_modules();
+		print_modules("");
 		print_irqtrace_events(current);
 		if (regs)
 			show_regs(regs);
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 247bf0b1582c..76aa96a01b10 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -137,7 +137,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
 
 		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
 			 this_cpu);
-		print_modules();
+		print_modules("");
 		print_irqtrace_events(current);
 		if (regs)
 			show_regs(regs);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4785a8a2040e..f71d639e0032 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -645,7 +645,7 @@ static void bad_page(struct page *page, const char *reason,
 						bad_flags, &bad_flags);
 	dump_page_owner(page);
 
-	print_modules();
+	print_modules("");
 	dump_stack();
 out:
 	/* Leave bad fields for debug, except PageBuddy could make trouble */

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-12-19 11:29 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-09 14:31 [PATCH v6 1/4] x86/insn-eval: Add support for 64-bit kernel mode Jann Horn
2019-12-09 14:31 ` [PATCH v6 2/4] x86/traps: Print address on #GP Jann Horn
2019-12-11 17:06   ` Borislav Petkov
2019-12-11 17:22     ` Andy Lutomirski
2019-12-11 17:29       ` Borislav Petkov
2019-12-11 18:17         ` Andy Lutomirski
2019-12-19 11:29           ` Borislav Petkov
2019-12-18 21:55     ` Jann Horn
2019-12-09 14:31 ` [PATCH v6 3/4] x86/dumpstack: Split out header line printing from __die() Jann Horn
2019-12-09 14:31 ` [PATCH v6 4/4] x86/kasan: Print original address on #GP Jann Horn
2019-12-11 17:37   ` Borislav Petkov
2019-12-18 22:33     ` Jann Horn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).