All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: Implement __WARN using UD0
@ 2017-02-23 13:28 Peter Zijlstra
  2017-02-23 14:09 ` Peter Zijlstra
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-23 13:28 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, torvalds, arjan, bp, jpoimboe, richard.weinberger


By using "UD0" for WARNs we remove the function call and its possible
__FILE__ and __LINE__ immediate arguments from the instruction stream.

Total image size will not change much, what we win in the instruction
stream we'll loose because of the __bug_table entries. Still, saves on
I$ footprint and the total image size does go down a bit.

  text            data     bss    dec              hex    filename                     size

  10503189        4442584  843776 15789549         f0eded defconfig-build/vmlinux.pre  25242904
  10483798        4442584  843776 15770158         f0a22e defconfig-build/vmlinux.post 25243504

(um didn't seem to use GENERIC_BUG at all, so remove it)

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Richard Weinberger <richard.weinberger@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/um/Kconfig.common         |    5 ---
 arch/x86/include/asm/bug.h     |   62 +++++++++++++++++++++++++++++++++--------
 arch/x86/kernel/dumpstack.c    |    3 -
 arch/x86/kernel/dumpstack_32.c |   12 -------
 arch/x86/kernel/dumpstack_64.c |   10 ------
 arch/x86/kernel/traps.c        |   50 +++++++++++++++++++++++++++++----
 arch/x86/um/Makefile           |    2 -
 arch/x86/um/bug.c              |   21 -------------
 8 files changed, 95 insertions(+), 70 deletions(-)

--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -50,11 +50,6 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
-config GENERIC_BUG
-	bool
-	default y
-	depends on BUG
-
 config HZ
 	int
 	default 100
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -1,36 +1,74 @@
 #ifndef _ASM_X86_BUG_H
 #define _ASM_X86_BUG_H
 
+#include <linux/stringify.h>
+
 #define HAVE_ARCH_BUG
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
+/*
+ * Only UD2 is defined in the Intel SDM and AMD64 docs,
+ * but the interweb provided UD0 and UD1:
+ *
+ *   https://groups.google.com/forum/#!topic/alt.os.assembly/_rS4L0fnqGE
+ *
+ * Since some emulators terminate on UD2, we cannot use it for WARN.
+ * Since various instruction decoders disagree on the length of UD1,
+ * we cannot use it either. So use UD0 for WARN.
+ *
+ * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
+ *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
+ *  with various things like the Intel SDM instruction encoding rules)
+ */
+
+#define ASM_UD0		".byte 0x0f, 0xff"
+#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
+#define ASM_UD2		".byte 0x0f, 0x0b"
 
 #ifdef CONFIG_X86_32
-# define __BUG_C0	"2:\t.long 1b, %c0\n"
+# define __BUG_REL(val)	"\t.long " __stringify(val) "\n"
 #else
-# define __BUG_C0	"2:\t.long 1b - 2b, %c0 - 2b\n"
+# define __BUG_REL(val)	"\t.long " __stringify(val) " - 2b\n"
 #endif
 
-#define BUG()							\
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define _BUG_FLAGS(ins, flags)					\
 do {								\
-	asm volatile("1:\tud2\n"				\
+	asm volatile("1:\t" ins "\n"				\
 		     ".pushsection __bug_table,\"a\"\n"		\
-		     __BUG_C0					\
-		     "\t.word %c1, 0\n"				\
-		     "\t.org 2b+%c2\n"				\
+		     "2:" __BUG_REL(1b)				\
+			  __BUG_REL(%c0)			\
+		     "\t.word %c1, %c2\n"			\
+		     "\t.org 2b+%c3\n"				\
 		     ".popsection"				\
 		     : : "i" (__FILE__), "i" (__LINE__),	\
-		     "i" (sizeof(struct bug_entry)));		\
-	unreachable();						\
+			 "i" (flags),				\
+			 "i" (sizeof(struct bug_entry)));	\
 } while (0)
 
 #else
+
+#define _BUG_FLAGS(ins, flags)					\
+do {								\
+	asm volatile("1:\t" ins "\n"				\
+		     ".pushsection __bug_table,\"a\"\n"		\
+		     "2:" __BUG_REL(1b)				\
+		     "\t.word %c0\n"				\
+		     "\t.org 2b+%c1\n"				\
+		     ".popsection"				\
+		     : : "i" (flags),				\
+			 "i" (sizeof(struct bug_entry)));	\
+} while (0)
+
+#endif
+
 #define BUG()							\
 do {								\
-	asm volatile("ud2");					\
+	_BUG_FLAGS(ASM_UD2, 0);					\
 	unreachable();						\
 } while (0)
-#endif
+
+#define __WARN_TAINT(taint)	_BUG_FLAGS(ASM_UD0, BUGFLAG_TAINT(taint))
 
 #include <asm-generic/bug.h>
 
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -289,9 +289,6 @@ void die(const char *str, struct pt_regs
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (!user_mode(regs))
-		report_bug(regs->ip, regs);
-
 	if (__die(str, regs, err))
 		sig = 0;
 	oops_end(flags, regs, sig);
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -162,15 +162,3 @@ void show_regs(struct pt_regs *regs)
 	}
 	pr_cont("\n");
 }
-
-int is_valid_bugaddr(unsigned long ip)
-{
-	unsigned short ud2;
-
-	if (ip < PAGE_OFFSET)
-		return 0;
-	if (probe_kernel_address((unsigned short *)ip, ud2))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -178,13 +178,3 @@ void show_regs(struct pt_regs *regs)
 	}
 	pr_cont("\n");
 }
-
-int is_valid_bugaddr(unsigned long ip)
-{
-	unsigned short ud2;
-
-	if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -169,6 +169,41 @@ void ist_end_non_atomic(void)
 	preempt_disable();
 }
 
+int is_valid_bugaddr(unsigned long addr)
+{
+	unsigned short ud;
+
+#ifdef CONFIG_X86_32
+	if (addr < PAGE_OFFSET)
+		return 0;
+#else
+	if (addr > 0)
+		return 0;
+#endif
+	if (probe_kernel_address((unsigned short *)addr, ud))
+		return 0;
+
+	return ud == 0x0b0f || ud == 0xff0f;
+}
+
+static int fixup_bug(struct pt_regs *regs, int trapnr)
+{
+	if (trapnr != X86_TRAP_UD)
+		return 0;
+
+	switch (report_bug(regs->ip, regs)) {
+	case BUG_TRAP_TYPE_NONE:
+	case BUG_TRAP_TYPE_BUG:
+		break;
+
+	case BUG_TRAP_TYPE_WARN:
+		regs->ip += 2;
+		return 1;
+	}
+
+	return 0;
+}
+
 static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		  struct pt_regs *regs,	long error_code)
@@ -187,12 +222,15 @@ do_trap_no_signal(struct task_struct *ts
 	}
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs, trapnr)) {
-			tsk->thread.error_code = error_code;
-			tsk->thread.trap_nr = trapnr;
-			die(str, regs, error_code);
-		}
-		return 0;
+		if (fixup_exception(regs, trapnr))
+			return 0;
+
+		if (fixup_bug(regs, trapnr))
+			return 0;
+
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_nr = trapnr;
+		die(str, regs, error_code);
 	}
 
 	return -1;
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -8,7 +8,7 @@ else
 	BITS := 64
 endif
 
-obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
+obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
 	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
 	stub_$(BITS).o stub_segv.o \
 	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
--- a/arch/x86/um/bug.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL V2
- */
-
-#include <linux/uaccess.h>
-
-/*
- * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
- * that's not relevant in skas mode.
- */
-
-int is_valid_bugaddr(unsigned long eip)
-{
-	unsigned short ud2;
-
-	if (probe_kernel_address((unsigned short __user *)eip, ud2))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 13:28 [PATCH] x86: Implement __WARN using UD0 Peter Zijlstra
@ 2017-02-23 14:09 ` Peter Zijlstra
  2017-02-23 14:59   ` Peter Zijlstra
  2017-02-23 15:09   ` hpa
  2017-02-23 14:12 ` Josh Poimboeuf
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-23 14:09 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, torvalds, arjan, bp, jpoimboe, richard.weinberger

On Thu, Feb 23, 2017 at 02:28:13PM +0100, Peter Zijlstra wrote:
> + * Since various instruction decoders disagree on the length of UD1,
> + * we cannot use it either. So use UD0 for WARN.
> + *
> + * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
> + *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
> + *  with various things like the Intel SDM instruction encoding rules)
> + */
> +
> +#define ASM_UD0		".byte 0x0f, 0xff"
> +#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
> +#define ASM_UD2		".byte 0x0f, 0x0b"

http://repo.or.cz/nasm.git/blob/HEAD:/x86/insns.dat

has:

1378 UD0             void                            [       0f ff]                                  186,UNDOC
1379 UD1             void                            [       0f b9]                                  186,UNDOC
1380 UD2B            void                            [       0f b9]                                  186,UNDOC,ND
1381 UD2             void                            [       0f 0b]                                  186
1382 UD2A            void                            [       0f 0b]                                  186,ND

which seems to use the 2 byte version of UD1.

hpa, any input?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 13:28 [PATCH] x86: Implement __WARN using UD0 Peter Zijlstra
  2017-02-23 14:09 ` Peter Zijlstra
@ 2017-02-23 14:12 ` Josh Poimboeuf
  2017-02-23 14:17   ` Borislav Petkov
  2017-02-23 14:36   ` Peter Zijlstra
  2017-02-23 14:14 ` Arjan van de Ven
  2017-02-24 11:16 ` [PATCH -v2] " Peter Zijlstra
  3 siblings, 2 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2017-02-23 14:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, linux-kernel,
	torvalds, arjan, bp, richard.weinberger

On Thu, Feb 23, 2017 at 02:28:13PM +0100, Peter Zijlstra wrote:
> +/*
> + * Only UD2 is defined in the Intel SDM and AMD64 docs,
> + * but the interweb provided UD0 and UD1:
> + *
> + *   https://groups.google.com/forum/#!topic/alt.os.assembly/_rS4L0fnqGE

FYI, the latest Intel SDM does have UD0 and UD1.

I guess AMD64 and hypervisors don't need to know about it, since if they
don't, it will still trigger the same invalid opcode exception anyway?

Any idea what the functional difference is between UD0 and UD2?

> + * Since some emulators terminate on UD2, we cannot use it for WARN.

That's too bad, which emulators are those?

> + * Since various instruction decoders disagree on the length of UD1,
> + * we cannot use it either. So use UD0 for WARN.

It shows up as "(bad)" in objtool and gdb, though it still gets the
length right:

  8b9:       0f ff                   (bad)  

It would be nice if the tools knew about it...

-- 
Josh

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 13:28 [PATCH] x86: Implement __WARN using UD0 Peter Zijlstra
  2017-02-23 14:09 ` Peter Zijlstra
  2017-02-23 14:12 ` Josh Poimboeuf
@ 2017-02-23 14:14 ` Arjan van de Ven
  2017-02-23 14:57   ` Peter Zijlstra
  2017-02-24 11:16 ` [PATCH -v2] " Peter Zijlstra
  3 siblings, 1 reply; 29+ messages in thread
From: Arjan van de Ven @ 2017-02-23 14:14 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, torvalds, bp, jpoimboe, richard.weinberger

On 2/23/2017 5:28 AM, Peter Zijlstra wrote:
>
> By using "UD0" for WARNs we remove the function call and its possible
> __FILE__ and __LINE__ immediate arguments from the instruction stream.
>
> Total image size will not change much, what we win in the instruction
> stream we'll loose because of the __bug_table entries. Still, saves on
> I$ footprint and the total image size does go down a bit.

well I am a little sceptical; WARNs are rare so the code (other than the test)
should be waaay out of line already (unlikely() and co).
And I assume you're not removing the __FILE__ and __LINE__ info, since that info
is actually high value for us developers... so what are you actually saving?

(icache saving is only real if the line that the cold code lives on would actually
end up in icache for other reasons; I would hope the compiler puts the out
of line code WAY out of line)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 14:12 ` Josh Poimboeuf
@ 2017-02-23 14:17   ` Borislav Petkov
  2017-02-23 14:36   ` Peter Zijlstra
  1 sibling, 0 replies; 29+ messages in thread
From: Borislav Petkov @ 2017-02-23 14:17 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Peter Zijlstra, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	linux-kernel, torvalds, arjan, richard.weinberger

On Thu, Feb 23, 2017 at 08:12:32AM -0600, Josh Poimboeuf wrote:
> I guess AMD64 and hypervisors don't need to know about it, since if they

So the insns are mentioned in AMD's APMv2, in the instruction tables at
the end. Just not fully documented like UD2.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 14:12 ` Josh Poimboeuf
  2017-02-23 14:17   ` Borislav Petkov
@ 2017-02-23 14:36   ` Peter Zijlstra
  1 sibling, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-23 14:36 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, linux-kernel,
	torvalds, arjan, bp, richard.weinberger

On Thu, Feb 23, 2017 at 08:12:32AM -0600, Josh Poimboeuf wrote:
> On Thu, Feb 23, 2017 at 02:28:13PM +0100, Peter Zijlstra wrote:
> > +/*
> > + * Only UD2 is defined in the Intel SDM and AMD64 docs,
> > + * but the interweb provided UD0 and UD1:
> > + *
> > + *   https://groups.google.com/forum/#!topic/alt.os.assembly/_rS4L0fnqGE
> 
> FYI, the latest Intel SDM does have UD0 and UD1.

Ooh, shiny, a new one.

> > + * Since various instruction decoders disagree on the length of UD1,
> > + * we cannot use it either. So use UD0 for WARN.
> 
> It shows up as "(bad)" in objtool and gdb, though it still gets the
> length right:
> 
>   8b9:       0f ff                   (bad)  
> 
> It would be nice if the tools knew about it...

Now that the SDM lists them, I suppose that's going to be any day now
:-)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 14:14 ` Arjan van de Ven
@ 2017-02-23 14:57   ` Peter Zijlstra
  2017-02-24  7:43     ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-23 14:57 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, linux-kernel,
	torvalds, bp, jpoimboe, richard.weinberger

On Thu, Feb 23, 2017 at 06:14:45AM -0800, Arjan van de Ven wrote:
> On 2/23/2017 5:28 AM, Peter Zijlstra wrote:
> >
> >By using "UD0" for WARNs we remove the function call and its possible
> >__FILE__ and __LINE__ immediate arguments from the instruction stream.
> >
> >Total image size will not change much, what we win in the instruction
> >stream we'll loose because of the __bug_table entries. Still, saves on
> >I$ footprint and the total image size does go down a bit.
> 
> well I am a little sceptical; WARNs are rare so the code (other than the test)
> should be waaay out of line already (unlikely() and co).

There's only so much you can do in small functions. Sure it tries to
move the crud to the end, but at the end of the day, its still in the
same function.

> And I assume you're not removing the __FILE__ and __LINE__ info, since that info
> is actually high value for us developers... so what are you actually saving?

OK, so going by my own numbers the total image size does not in fact go
down (it did earlier when I initially wrote this patch) :/

I think back when I wrote this I had refcount_t generate inline WARNs
and that generates a huge amount of junk all over the place. This very
much did clear much of that up.

And as said; there's only so much you can do in small functions.

Look at the below for example (frobbed the code to do WARN_ON instead of
WARN), depending on alignment the WARN code will be in the same
cacheline as 'normal' code.


0000000000000016 <refcount_add_not_zero>:
  16:   55                      push   %rbp
  17:   8b 16                   mov    (%rsi),%edx
  19:   41 83 c8 ff             or     $0xffffffff,%r8d
  1d:   48 89 e5                mov    %rsp,%rbp
  20:   85 d2                   test   %edx,%edx
  22:   74 25                   je     49 <refcount_add_not_zero+0x33>
  24:   83 fa ff                cmp    $0xffffffff,%edx
  27:   74 1c                   je     45 <refcount_add_not_zero+0x2f>
  29:   89 d1                   mov    %edx,%ecx
  2b:   89 d0                   mov    %edx,%eax
  2d:   01 f9                   add    %edi,%ecx
  2f:   41 0f 42 c8             cmovb  %r8d,%ecx
  33:   f0 0f b1 0e             lock cmpxchg %ecx,(%rsi)
  37:   39 c2                   cmp    %eax,%edx
  39:   74 04                   je     3f <refcount_add_not_zero+0x29>
  3b:   89 c2                   mov    %eax,%edx
  3d:   eb e1                   jmp    20 <refcount_add_not_zero+0xa>
  3f:   ff c1                   inc    %ecx
  41:   75 02                   jne    45 <refcount_add_not_zero+0x2f>
  43:   0f ff                   (bad)  
  45:   b0 01                   mov    $0x1,%al
  47:   eb 02                   jmp    4b <refcount_add_not_zero+0x35>
  49:   31 c0                   xor    %eax,%eax
  4b:   5d                      pop    %rbp
  4c:   c3                      retq  



0000000000000016 <refcount_add_not_zero>:
  16:   8b 16                   mov    (%rsi),%edx
  18:   41 83 c8 ff             or     $0xffffffff,%r8d
  1c:   85 d2                   test   %edx,%edx
  1e:   74 3b                   je     5b <refcount_add_not_zero+0x45>
  20:   83 fa ff                cmp    $0xffffffff,%edx
  23:   75 03                   jne    28 <refcount_add_not_zero+0x12>
  25:   b0 01                   mov    $0x1,%al
  27:   c3                      retq   
  28:   89 d1                   mov    %edx,%ecx
  2a:   89 d0                   mov    %edx,%eax
  2c:   01 f9                   add    %edi,%ecx
  2e:   41 0f 42 c8             cmovb  %r8d,%ecx
  32:   f0 0f b1 0e             lock cmpxchg %ecx,(%rsi)
  36:   39 c2                   cmp    %eax,%edx
  38:   74 04                   je     3e <refcount_add_not_zero+0x28>
  3a:   89 c2                   mov    %eax,%edx
  3c:   eb de                   jmp    1c <refcount_add_not_zero+0x6>
  3e:   ff c1                   inc    %ecx
  40:   75 e3                   jne    25 <refcount_add_not_zero+0xf>
  42:   55                      push   %rbp
  43:   be 3f 00 00 00          mov    $0x3f,%esi
  48:   48 c7 c7 00 00 00 00    mov    $0x0,%rdi
                        4b: R_X86_64_32S        .rodata.str1.1
  4f:   48 89 e5                mov    %rsp,%rbp
  52:   e8 00 00 00 00          callq  57 <refcount_add_not_zero+0x41>
                        53: R_X86_64_PC32       warn_slowpath_null-0x4
  57:   b0 01                   mov    $0x1,%al
  59:   5d                      pop    %rbp
  5a:   c3                      retq   
  5b:   31 c0                   xor    %eax,%eax
  5d:   c3                      retq   

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 14:09 ` Peter Zijlstra
@ 2017-02-23 14:59   ` Peter Zijlstra
  2017-02-23 15:09   ` hpa
  1 sibling, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-23 14:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, torvalds, arjan, bp, jpoimboe, richard.weinberger

On Thu, Feb 23, 2017 at 03:09:29PM +0100, Peter Zijlstra wrote:
> On Thu, Feb 23, 2017 at 02:28:13PM +0100, Peter Zijlstra wrote:
> > + * Since various instruction decoders disagree on the length of UD1,
> > + * we cannot use it either. So use UD0 for WARN.
> > + *
> > + * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
> > + *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
> > + *  with various things like the Intel SDM instruction encoding rules)
> > + */
> > +
> > +#define ASM_UD0		".byte 0x0f, 0xff"
> > +#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
> > +#define ASM_UD2		".byte 0x0f, 0x0b"
> 
> http://repo.or.cz/nasm.git/blob/HEAD:/x86/insns.dat
> 
> has:
> 
> 1378 UD0             void                            [       0f ff]                                  186,UNDOC
> 1379 UD1             void                            [       0f b9]                                  186,UNDOC
> 1380 UD2B            void                            [       0f b9]                                  186,UNDOC,ND
> 1381 UD2             void                            [       0f 0b]                                  186
> 1382 UD2A            void                            [       0f 0b]                                  186,ND
> 
> which seems to use the 2 byte version of UD1.
> 
> hpa, any input?

N/m, as Josh said, they're listed in the latest SDM (Dec 2016) and that
lists UD1 /r, so 3 bytes.

This means binutils decodes it wrong.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 14:09 ` Peter Zijlstra
  2017-02-23 14:59   ` Peter Zijlstra
@ 2017-02-23 15:09   ` hpa
  2017-02-23 15:23     ` Peter Zijlstra
  1 sibling, 1 reply; 29+ messages in thread
From: hpa @ 2017-02-23 15:09 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner, Ingo Molnar
  Cc: linux-kernel, torvalds, arjan, bp, jpoimboe, richard.weinberger

On February 23, 2017 6:09:29 AM PST, Peter Zijlstra <peterz@infradead.org> wrote:
>On Thu, Feb 23, 2017 at 02:28:13PM +0100, Peter Zijlstra wrote:
>> + * Since various instruction decoders disagree on the length of UD1,
>> + * we cannot use it either. So use UD0 for WARN.
>> + *
>> + * (binutils knows about "ud1" but {en,de}codes it as 2 bytes,
>whereas
>> + *  our kernel decoder thinks it takes a ModRM byte, which seems
>consistent
>> + *  with various things like the Intel SDM instruction encoding
>rules)
>> + */
>> +
>> +#define ASM_UD0		".byte 0x0f, 0xff"
>> +#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
>> +#define ASM_UD2		".byte 0x0f, 0x0b"
>
>http://repo.or.cz/nasm.git/blob/HEAD:/x86/insns.dat
>
>has:
>
>1378 UD0             void                            [       0f ff]    
>                             186,UNDOC
>1379 UD1             void                            [       0f b9]    
>                             186,UNDOC
>1380 UD2B            void                            [       0f b9]    
>                             186,UNDOC,ND
>1381 UD2             void                            [       0f 0b]    
>                             186
>1382 UD2A            void                            [       0f 0b]    
>                             186,ND
>
>which seems to use the 2 byte version of UD1.
>
>hpa, any input?

Well, it only matters if the instruction extends past a segment boundary or page.  However, the CPU instruction decoder will consume a modrm for UD1, and so using just the two opcode bytes may cause a #PF or #GP when a #UD was intended.

This was documented very recently and Nasm hasn't caught up yet (hence the UNDOC flag.)
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 15:09   ` hpa
@ 2017-02-23 15:23     ` Peter Zijlstra
  2017-02-23 15:32       ` hpa
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-23 15:23 UTC (permalink / raw)
  To: hpa
  Cc: Thomas Gleixner, Ingo Molnar, linux-kernel, torvalds, arjan, bp,
	jpoimboe, richard.weinberger

On Thu, Feb 23, 2017 at 07:09:05AM -0800, hpa@zytor.com wrote:
> Well, it only matters if the instruction extends past a segment
> boundary or page.  However, the CPU instruction decoder will consume a
> modrm for UD1, and so using just the two opcode bytes may cause a #PF
> or #GP when a #UD was intended.

It also matters if you want the decoded instruction stream to make
sense.

If for instance I use UD1 without the ModRM byte for WARN, objtool gets
mighty confused because the instruction stream doesn't decode properly.

objtool will also consume the extra byte and then the next instruction
is offset and decodes wrong and it stresses out.

Similarly, if you were to do objdump (and objdump were to actually
correctly decode UD1) then the resulting asm would make no sense.

The kernel will work 'fine', because even without ModRM it will #UD, and
the #UD handler will IP+=2 and all is well, but it becomes impossible to
actually decode the function..

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 15:23     ` Peter Zijlstra
@ 2017-02-23 15:32       ` hpa
  2017-02-23 16:03         ` Borislav Petkov
  0 siblings, 1 reply; 29+ messages in thread
From: hpa @ 2017-02-23 15:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Ingo Molnar, linux-kernel, torvalds, arjan, bp,
	jpoimboe, richard.weinberger

On February 23, 2017 7:23:09 AM PST, Peter Zijlstra <peterz@infradead.org> wrote:
>On Thu, Feb 23, 2017 at 07:09:05AM -0800, hpa@zytor.com wrote:
>> Well, it only matters if the instruction extends past a segment
>> boundary or page.  However, the CPU instruction decoder will consume
>a
>> modrm for UD1, and so using just the two opcode bytes may cause a #PF
>> or #GP when a #UD was intended.
>
>It also matters if you want the decoded instruction stream to make
>sense.
>
>If for instance I use UD1 without the ModRM byte for WARN, objtool gets
>mighty confused because the instruction stream doesn't decode properly.
>
>objtool will also consume the extra byte and then the next instruction
>is offset and decodes wrong and it stresses out.
>
>Similarly, if you were to do objdump (and objdump were to actually
>correctly decode UD1) then the resulting asm would make no sense.
>
>The kernel will work 'fine', because even without ModRM it will #UD,
>and
>the #UD handler will IP+=2 and all is well, but it becomes impossible
>to
>actually decode the function..

Well, once you are using invalid instructions, it depends not on what the CPU decodes but what your own handler expects.  Consider Microsoft's use of C4 C4 /ib as a meta-instruction (called BOP, "BIOS operation")... that format has nothing to do with the CPU, but if you want to disassemble the resulting code you need to know about how they encode BOP.

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 15:32       ` hpa
@ 2017-02-23 16:03         ` Borislav Petkov
  0 siblings, 0 replies; 29+ messages in thread
From: Borislav Petkov @ 2017-02-23 16:03 UTC (permalink / raw)
  To: hpa
  Cc: Peter Zijlstra, Thomas Gleixner, Ingo Molnar, linux-kernel,
	torvalds, arjan, jpoimboe, richard.weinberger

On Thu, Feb 23, 2017 at 07:32:07AM -0800, hpa@zytor.com wrote:
> Well, once you are using invalid instructions, it depends not on
> what the CPU decodes but what your own handler expects. Consider
> Microsoft's use of C4 C4 /ib as a meta-instruction (called BOP, "BIOS
> operation")... that format has nothing to do with the CPU, but if you
> want to disassemble the resulting code you need to know about how they
> encode BOP.

How do they use that?

They rely on the fact that C4 C4 is going to #UD as it is an invalid VEX
insn? The second C4 selecting the 100b map which is reserved?

Or?

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-23 14:57   ` Peter Zijlstra
@ 2017-02-24  7:43     ` Ingo Molnar
  2017-02-24  8:31       ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2017-02-24  7:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arjan van de Ven, Thomas Gleixner, H. Peter Anvin, linux-kernel,
	torvalds, bp, jpoimboe, richard.weinberger, Andrew Morton


* Peter Zijlstra <peterz@infradead.org> wrote:

> 0000000000000016 <refcount_add_not_zero>:
>   16:   55                      push   %rbp
>   17:   8b 16                   mov    (%rsi),%edx
>   19:   41 83 c8 ff             or     $0xffffffff,%r8d
>   1d:   48 89 e5                mov    %rsp,%rbp
>   20:   85 d2                   test   %edx,%edx
>   22:   74 25                   je     49 <refcount_add_not_zero+0x33>
>   24:   83 fa ff                cmp    $0xffffffff,%edx
>   27:   74 1c                   je     45 <refcount_add_not_zero+0x2f>
>   29:   89 d1                   mov    %edx,%ecx
>   2b:   89 d0                   mov    %edx,%eax
>   2d:   01 f9                   add    %edi,%ecx
>   2f:   41 0f 42 c8             cmovb  %r8d,%ecx
>   33:   f0 0f b1 0e             lock cmpxchg %ecx,(%rsi)
>   37:   39 c2                   cmp    %eax,%edx
>   39:   74 04                   je     3f <refcount_add_not_zero+0x29>
>   3b:   89 c2                   mov    %eax,%edx
>   3d:   eb e1                   jmp    20 <refcount_add_not_zero+0xa>
>   3f:   ff c1                   inc    %ecx
>   41:   75 02                   jne    45 <refcount_add_not_zero+0x2f>
>   43:   0f ff                   (bad)  
>   45:   b0 01                   mov    $0x1,%al
>   47:   eb 02                   jmp    4b <refcount_add_not_zero+0x35>
>   49:   31 c0                   xor    %eax,%eax
>   4b:   5d                      pop    %rbp
>   4c:   c3                      retq  

BTW., one thing that is probably not represented fairly by this example is the 
better, much lower register clobbering impact of trap-driven WARN_ON() versus 
function call driven WARN_ON(): the trap will preserve all registers, while a call 
driven slow path will clobber all the caller-save registers.

In this example this does not show up much because the WARN_ON() is done at the 
end of the function. In function where WARN()s are emitted earlier the size of the 
slow path should be even better.

In any case, I like your patch, I believe what counts is the .text size reduction:

  text            data     bss    dec              hex    filename                     size

  10503189        4442584  843776 15789549         f0eded defconfig-build/vmlinux.pre  25242904
  10483798        4442584  843776 15770158         f0a22e defconfig-build/vmlinux.post 25243504

Put differently: for essentially zero RAM and runtime cost we've bought a 0.2% 
larger (instruction) cache (!!). That's quite significant, IMHO, as lots of kernel 
intense workloads involve many small functions.

The only high level question is whether we trust the trap machinery to generate 
WARN_ON()s. I believe we do.

BTW.: why not use INT3 instead of all these weird #UD opcodes? It's a single byte 
opcode and we can do a quick exception table search in do_debug(). This way we'll 
also have irqs disabled which might help getting the message out before any irq 
handler comes in and muddies the waters.

In a sense WARN_ON()s and BUG_ON()s can be considered permanently installed 
in-line kprobes, with a special, built-in handler.

BTW. #2: side note, GCC generated crap code here. Why didn't it do:

>   3f:   ff c1                   inc    %ecx
>   41:   75 02                   jne    55 <refcount_add_not_zero+0x2f>
>   45:   b0 01                   mov    $0x1,%al
>   4b:   5d                      pop    %rbp
>   4c:   c3                      retq  
>
>   49:   31 c0                   xor    %eax,%eax
>   4b:   5d                      pop    %rbp
>   4c:   c3                      retq  
>
>   55:   0f ff                   (bad)  

?

It's hand edited so the offsets are wrong, but the idea is that for the same 14 
bytes we get a straight fall-through fast path to the RETQ and one JMP less 
executed in the 'return 1' case. Both the 'return 0' case and the #UD are in tail 
part of the function.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24  7:43     ` Ingo Molnar
@ 2017-02-24  8:31       ` Peter Zijlstra
  2017-02-24  9:06         ` hpa
  2017-02-24  9:11         ` hpa
  0 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-24  8:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Thomas Gleixner, H. Peter Anvin, linux-kernel,
	torvalds, bp, jpoimboe, richard.weinberger, Andrew Morton

On Fri, Feb 24, 2017 at 08:43:25AM +0100, Ingo Molnar wrote:
> The only high level question is whether we trust the trap machinery to generate 
> WARN_ON()s. I believe we do.
> 
> BTW.: why not use INT3 instead of all these weird #UD opcodes? It's a single byte 
> opcode and we can do a quick exception table search in do_debug(). This way we'll 
> also have irqs disabled which might help getting the message out before any irq 
> handler comes in and muddies the waters.
> 
> In a sense WARN_ON()s and BUG_ON()s can be considered permanently installed 
> in-line kprobes, with a special, built-in handler.

I've actually been looking into that. There's a bunch of 'fun' details
that I've been checking, but I think I can make that happen.

My initial patch extended the existing UD2 BUG trap to include the WARN,
this is what many other architectures already do. Arjan then complained
that some emulators terminate on UD2 and could I please not use that for
WARN, at which point Borislav called my attention to UD0/UD1.

So I made the UD0 change and posted (fwiw, there's a lost refresh in the
patch I posted and it will not actually work).

I think I'll post an update of said patch and then attempt to do the
INT3 thing in a later patch -- that will require at least one new knob
in the generic BUG code ...

> BTW. #2: side note, GCC generated crap code here. Why didn't it do:

I've seen GCC do 'wonderful' things the past few weeks. Absolutely mind
boggling stuff.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24  8:31       ` Peter Zijlstra
@ 2017-02-24  9:06         ` hpa
  2017-02-24  9:11         ` hpa
  1 sibling, 0 replies; 29+ messages in thread
From: hpa @ 2017-02-24  9:06 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: Arjan van de Ven, Thomas Gleixner, linux-kernel, torvalds, bp,
	jpoimboe, richard.weinberger, Andrew Morton

On February 24, 2017 12:31:15 AM PST, Peter Zijlstra <peterz@infradead.org> wrote:
>On Fri, Feb 24, 2017 at 08:43:25AM +0100, Ingo Molnar wrote:
>> The only high level question is whether we trust the trap machinery
>to generate 
>> WARN_ON()s. I believe we do.
>> 
>> BTW.: why not use INT3 instead of all these weird #UD opcodes? It's a
>single byte 
>> opcode and we can do a quick exception table search in do_debug().
>This way we'll 
>> also have irqs disabled which might help getting the message out
>before any irq 
>> handler comes in and muddies the waters.
>> 
>> In a sense WARN_ON()s and BUG_ON()s can be considered permanently
>installed 
>> in-line kprobes, with a special, built-in handler.
>
>I've actually been looking into that. There's a bunch of 'fun' details
>that I've been checking, but I think I can make that happen.
>
>My initial patch extended the existing UD2 BUG trap to include the
>WARN,
>this is what many other architectures already do. Arjan then complained
>that some emulators terminate on UD2 and could I please not use that
>for
>WARN, at which point Borislav called my attention to UD0/UD1.
>
>So I made the UD0 change and posted (fwiw, there's a lost refresh in
>the
>patch I posted and it will not actually work).
>
>I think I'll post an update of said patch and then attempt to do the
>INT3 thing in a later patch -- that will require at least one new knob
>in the generic BUG code ...
>
>> BTW. #2: side note, GCC generated crap code here. Why didn't it do:
>
>I've seen GCC do 'wonderful' things the past few weeks. Absolutely mind
>boggling stuff.

Terminating on UD2 is really obnoxious, btw. JMPE, ICEBP/INT1, some I/O port or MSR would be appropriate choices for that, but UD2 is rude in the extreme.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24  8:31       ` Peter Zijlstra
  2017-02-24  9:06         ` hpa
@ 2017-02-24  9:11         ` hpa
  2017-02-24  9:15           ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: hpa @ 2017-02-24  9:11 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: Arjan van de Ven, Thomas Gleixner, linux-kernel, torvalds, bp,
	jpoimboe, richard.weinberger, Andrew Morton

On February 24, 2017 12:31:15 AM PST, Peter Zijlstra <peterz@infradead.org> wrote:
>On Fri, Feb 24, 2017 at 08:43:25AM +0100, Ingo Molnar wrote:
>> The only high level question is whether we trust the trap machinery
>to generate 
>> WARN_ON()s. I believe we do.
>> 
>> BTW.: why not use INT3 instead of all these weird #UD opcodes? It's a
>single byte 
>> opcode and we can do a quick exception table search in do_debug().
>This way we'll 
>> also have irqs disabled which might help getting the message out
>before any irq 
>> handler comes in and muddies the waters.
>> 
>> In a sense WARN_ON()s and BUG_ON()s can be considered permanently
>installed 
>> in-line kprobes, with a special, built-in handler.
>
>I've actually been looking into that. There's a bunch of 'fun' details
>that I've been checking, but I think I can make that happen.
>
>My initial patch extended the existing UD2 BUG trap to include the
>WARN,
>this is what many other architectures already do. Arjan then complained
>that some emulators terminate on UD2 and could I please not use that
>for
>WARN, at which point Borislav called my attention to UD0/UD1.
>
>So I made the UD0 change and posted (fwiw, there's a lost refresh in
>the
>patch I posted and it will not actually work).
>
>I think I'll post an update of said patch and then attempt to do the
>INT3 thing in a later patch -- that will require at least one new knob
>in the generic BUG code ...
>
>> BTW. #2: side note, GCC generated crap code here. Why didn't it do:
>
>I've seen GCC do 'wonderful' things the past few weeks. Absolutely mind
>boggling stuff.

Incidentally, as an alternative to a #UD, int $9 could be an alternative (exception vector 9 was discontinued with the 486.)
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24  9:11         ` hpa
@ 2017-02-24  9:15           ` Ingo Molnar
  2017-02-24  9:46             ` Borislav Petkov
       [not found]             ` <21ac6e53-2fcd-52e9-e72d-9faf7da14d1e@zytor.com>
  0 siblings, 2 replies; 29+ messages in thread
From: Ingo Molnar @ 2017-02-24  9:15 UTC (permalink / raw)
  To: hpa
  Cc: Peter Zijlstra, Arjan van de Ven, Thomas Gleixner, linux-kernel,
	torvalds, bp, jpoimboe, richard.weinberger, Andrew Morton


* hpa@zytor.com <hpa@zytor.com> wrote:

> Incidentally, as an alternative to a #UD, int $9 could be an alternative 
> (exception vector 9 was discontinued with the 486.)

With thousands of call sites a one byte opcode would be preferred, that's why INT3 
is so nice.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24  9:15           ` Ingo Molnar
@ 2017-02-24  9:46             ` Borislav Petkov
  2017-02-24  9:52               ` H. Peter Anvin
       [not found]             ` <21ac6e53-2fcd-52e9-e72d-9faf7da14d1e@zytor.com>
  1 sibling, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2017-02-24  9:46 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: hpa, Peter Zijlstra, Arjan van de Ven, Thomas Gleixner,
	linux-kernel, torvalds, jpoimboe, richard.weinberger,
	Andrew Morton

On Fri, Feb 24, 2017 at 10:15:37AM +0100, Ingo Molnar wrote:
> With thousands of call sites a one byte opcode would be preferred, that's why INT3 
> is so nice.

ICEBP is also a single-byte: F1. And should be pretty unused :)

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24  9:46             ` Borislav Petkov
@ 2017-02-24  9:52               ` H. Peter Anvin
  0 siblings, 0 replies; 29+ messages in thread
From: H. Peter Anvin @ 2017-02-24  9:52 UTC (permalink / raw)
  To: Borislav Petkov, Ingo Molnar
  Cc: Peter Zijlstra, Arjan van de Ven, Thomas Gleixner, linux-kernel,
	torvalds, jpoimboe, richard.weinberger, Andrew Morton

On 02/24/17 01:46, Borislav Petkov wrote:
> On Fri, Feb 24, 2017 at 10:15:37AM +0100, Ingo Molnar wrote:
>> With thousands of call sites a one byte opcode would be preferred, that's why INT3 
>> is so nice.
> 
> ICEBP is also a single-byte: F1. And should be pretty unused :)
> 

That would make debugging preproduction hardware on Linux a living
nightmare.

	-hpa

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
       [not found]             ` <21ac6e53-2fcd-52e9-e72d-9faf7da14d1e@zytor.com>
@ 2017-02-24 10:41               ` Peter Zijlstra
  2017-02-25 10:38                 ` Borislav Petkov
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-24 10:41 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Arjan van de Ven, Thomas Gleixner, linux-kernel,
	torvalds, bp, jpoimboe, richard.weinberger, Andrew Morton

On Fri, Feb 24, 2017 at 01:53:27AM -0800, H. Peter Anvin wrote:
> On 02/24/17 01:15, Ingo Molnar wrote:
> > 
> > * hpa@zytor.com <hpa@zytor.com> wrote:
> > 
> >> Incidentally, as an alternative to a #UD, int $9 could be an alternative 
> >> (exception vector 9 was discontinued with the 486.)
> > 
> > With thousands of call sites a one byte opcode would be preferred, that's why INT3 
> > is so nice.
> > 
> 
> Yes, but using INT3 would bugger up debuggers.

So I've been looking into this, in general debuggers will refuse to poke
INT3 on top of INT3 (sane and obvious behaviour).

Also, we'd run any actual handlers before fixup_bug(), the only thing
here is that we need to also do fixup_bug() if the instruction pointer
isn't changed. This would allow debuggers to effectively stack on top
of the already present breakpoint.

Optimized kprobes will make sure to not clobber INT3 (although it will
happily clobber anything else from __ex_table[]).

The only other point is that currently report_bug() relies on
is_valid_bugaddr() to tell if the instruction is 'good', but then
reports a BUG if there is no entry in __bug_table[]. We'd have to change
that to be strict and assume __bug_table[] is complete, and report
BUG_TRAP_TYPE_NONE when there is no __bug_table[] entry, this would also
avoid the need for is_valid_bugaddr().

So yes, its tricky but it could be done. A new single byte #UD
instruction would be much nicer though.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH -v2] x86: Implement __WARN using UD0
  2017-02-23 13:28 [PATCH] x86: Implement __WARN using UD0 Peter Zijlstra
                   ` (2 preceding siblings ...)
  2017-02-23 14:14 ` Arjan van de Ven
@ 2017-02-24 11:16 ` Peter Zijlstra
  2017-02-25  8:19   ` [RFC][PATCH] bug: Add _ONCE logic to report_bug() Peter Zijlstra
  3 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-24 11:16 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, torvalds, arjan, bp, jpoimboe, richard.weinberger


By using "UD0" for WARNs we remove the function call and its possible
__FILE__ and __LINE__ immediate arguments from the instruction stream.

Total image size will not change much, what we win in the instrucion
stream we'll loose because of the __bug_table entries. Still, saves on
I$ footprint and register pressure at the callsites.

  text            data     bss    dec              hex    filename                     size

  10503189        4442584  843776 15789549         f0eded defconfig-build/vmlinux.pre  25242904
  10483798        4442584  843776 15770158         f0a22e defconfig-build/vmlinux.post 25243504

Cc: Richard Weinberger <richard.weinberger@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
-v2:

 - Added ASM comments to the __bug_table[] entries describing the fields
   (requested by bpetkov).
 - Added /* */ markers to the #else and #endif (also bpetkov)
 - Fixed the x86_64 kernel address test in is_valid_bugaddr().
 - Removed the random interweb link because the latest Intel SDM
   (Dec'16) actually lists the UD0/UD1/UD2 instructions.

 arch/um/Kconfig.common         |    5 ---
 arch/x86/include/asm/bug.h     |   68 ++++++++++++++++++++++++++++++-----------
 arch/x86/kernel/dumpstack.c    |    3 -
 arch/x86/kernel/dumpstack_32.c |   12 -------
 arch/x86/kernel/dumpstack_64.c |   10 ------
 arch/x86/kernel/traps.c        |   50 ++++++++++++++++++++++++++----
 arch/x86/um/Makefile           |    2 -
 arch/x86/um/bug.c              |   21 ------------
 8 files changed, 96 insertions(+), 75 deletions(-)

--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -50,11 +50,6 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
-config GENERIC_BUG
-	bool
-	default y
-	depends on BUG
-
 config HZ
 	int
 	default 100
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -1,36 +1,70 @@
 #ifndef _ASM_X86_BUG_H
 #define _ASM_X86_BUG_H
 
+#include <linux/stringify.h>
+
 #define HAVE_ARCH_BUG
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
+/*
+ * Since some emulators terminate on UD2, we cannot use it for WARN.
+ * Since various instruction decoders disagree on the length of UD1,
+ * we cannot use it either. So use UD0 for WARN.
+ *
+ * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
+ *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
+ *  with various things like the Intel SDM instruction encoding rules)
+ */
+
+#define ASM_UD0		".byte 0x0f, 0xff"
+#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
+#define ASM_UD2		".byte 0x0f, 0x0b"
 
 #ifdef CONFIG_X86_32
-# define __BUG_C0	"2:\t.long 1b, %c0\n"
+# define __BUG_REL(val)	".long " __stringify(val)
 #else
-# define __BUG_C0	"2:\t.long 1b - 2b, %c0 - 2b\n"
+# define __BUG_REL(val)	".long " __stringify(val) " - 2b"
 #endif
 
-#define BUG()							\
-do {								\
-	asm volatile("1:\tud2\n"				\
-		     ".pushsection __bug_table,\"a\"\n"		\
-		     __BUG_C0					\
-		     "\t.word %c1, 0\n"				\
-		     "\t.org 2b+%c2\n"				\
-		     ".popsection"				\
-		     : : "i" (__FILE__), "i" (__LINE__),	\
-		     "i" (sizeof(struct bug_entry)));		\
-	unreachable();						\
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define _BUG_FLAGS(ins, flags)						\
+do {									\
+	asm volatile("1:\t" ins "\n"					\
+		     ".pushsection __bug_table,\"a\"\n"			\
+		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
+		     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"	\
+		     "\t.word %c1"        "\t# bug_entry::line\n"	\
+		     "\t.word %c2"        "\t# bug_entry::flags\n"	\
+		     "\t.org 2b+%c3\n"					\
+		     ".popsection"					\
+		     : : "i" (__FILE__), "i" (__LINE__),		\
+			 "i" (flags),					\
+			 "i" (sizeof(struct bug_entry)));		\
 } while (0)
 
-#else
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
+#define _BUG_FLAGS(ins, flags)						\
+do {									\
+	asm volatile("1:\t" ins "\n"					\
+		     ".pushsection __bug_table,\"a\"\n"			\
+		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
+		     "\t.word %c0"        "\t# bug_entry::flags\n"	\
+		     "\t.org 2b+%c1\n"					\
+		     ".popsection"					\
+		     : : "i" (flags),					\
+			 "i" (sizeof(struct bug_entry)));		\
+} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
 #define BUG()							\
 do {								\
-	asm volatile("ud2");					\
+	_BUG_FLAGS(ASM_UD2, 0);					\
 	unreachable();						\
 } while (0)
-#endif
+
+#define __WARN_TAINT(taint)	_BUG_FLAGS(ASM_UD0, BUGFLAG_TAINT(taint))
 
 #include <asm-generic/bug.h>
 
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -289,9 +289,6 @@ void die(const char *str, struct pt_regs
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (!user_mode(regs))
-		report_bug(regs->ip, regs);
-
 	if (__die(str, regs, err))
 		sig = 0;
 	oops_end(flags, regs, sig);
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -162,15 +162,3 @@ void show_regs(struct pt_regs *regs)
 	}
 	pr_cont("\n");
 }
-
-int is_valid_bugaddr(unsigned long ip)
-{
-	unsigned short ud2;
-
-	if (ip < PAGE_OFFSET)
-		return 0;
-	if (probe_kernel_address((unsigned short *)ip, ud2))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -178,13 +178,3 @@ void show_regs(struct pt_regs *regs)
 	}
 	pr_cont("\n");
 }
-
-int is_valid_bugaddr(unsigned long ip)
-{
-	unsigned short ud2;
-
-	if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -169,6 +169,41 @@ void ist_end_non_atomic(void)
 	preempt_disable();
 }
 
+int is_valid_bugaddr(unsigned long addr)
+{
+	unsigned short ud;
+
+#ifdef CONFIG_X86_32
+	if (addr < PAGE_OFFSET)
+		return 0;
+#else
+	if ((long)addr > 0)
+		return 0;
+#endif
+	if (probe_kernel_address((unsigned short *)addr, ud))
+		return 0;
+
+	return ud == 0x0b0f || ud == 0xff0f;
+}
+
+static int fixup_bug(struct pt_regs *regs, int trapnr)
+{
+	if (trapnr != X86_TRAP_UD)
+		return 0;
+
+	switch (report_bug(regs->ip, regs)) {
+	case BUG_TRAP_TYPE_NONE:
+	case BUG_TRAP_TYPE_BUG:
+		break;
+
+	case BUG_TRAP_TYPE_WARN:
+		regs->ip += 2;
+		return 1;
+	}
+
+	return 0;
+}
+
 static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		  struct pt_regs *regs,	long error_code)
@@ -187,12 +222,15 @@ do_trap_no_signal(struct task_struct *ts
 	}
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs, trapnr)) {
-			tsk->thread.error_code = error_code;
-			tsk->thread.trap_nr = trapnr;
-			die(str, regs, error_code);
-		}
-		return 0;
+		if (fixup_exception(regs, trapnr))
+			return 0;
+
+		if (fixup_bug(regs, trapnr))
+			return 0;
+
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_nr = trapnr;
+		die(str, regs, error_code);
 	}
 
 	return -1;
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -8,7 +8,7 @@ else
 	BITS := 64
 endif
 
-obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
+obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
 	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
 	stub_$(BITS).o stub_segv.o \
 	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
--- a/arch/x86/um/bug.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL V2
- */
-
-#include <linux/uaccess.h>
-
-/*
- * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
- * that's not relevant in skas mode.
- */
-
-int is_valid_bugaddr(unsigned long eip)
-{
-	unsigned short ud2;
-
-	if (probe_kernel_address((unsigned short __user *)eip, ud2))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [RFC][PATCH] bug: Add _ONCE logic to report_bug()
  2017-02-24 11:16 ` [PATCH -v2] " Peter Zijlstra
@ 2017-02-25  8:19   ` Peter Zijlstra
  2017-02-25  9:18     ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-25  8:19 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, torvalds, arjan, bp, jpoimboe, richard.weinberger


Josh suggested moving the _ONCE logic inside the trap handler, using a
bit in the bug_entry::flags field, avoiding the need for the extra
variable.

Sadly this only works for WARN_ON_ONCE(), since the others have
printk() statements prior to triggering the trap.

Still, this saves some text and data:

  text            data     bss    dec              hex    filename
  10469505        4443448  843776 15756729         f06db9 defconfig-build/vmlinux-ud0
  10452803        4442616  843776 15739195         f0293b defconfig-build/vmlinux-ud0-once

(Only compile tested on x86_64 so far.)

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/arm64/include/asm/bug.h      |    2 +-
 arch/parisc/include/asm/bug.h     |    8 ++++----
 arch/powerpc/include/asm/bug.h    |    4 ++--
 arch/s390/include/asm/bug.h       |    4 ++--
 arch/sh/include/asm/bug.h         |    4 ++--
 arch/x86/include/asm/bug.h        |    2 +-
 include/asm-generic/bug.h         |   18 +++++++++++++++++-
 include/asm-generic/vmlinux.lds.h |    3 +--
 include/linux/bug.h               |    2 +-
 lib/bug.c                         |   28 ++++++++++++++++++++--------
 10 files changed, 51 insertions(+), 24 deletions(-)

--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -55,7 +55,7 @@ _BUGVERBOSE_LOCATION(__FILE__, __LINE__)
 	unreachable();				\
 } while (0)
 
-#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint))
+#define __WARN_FLAGS(flags) _BUG_FLAGS(BUGFLAG_WARNING|(flags))
 
 #endif /* ! CONFIG_GENERIC_BUG */
 
--- a/arch/parisc/include/asm/bug.h
+++ b/arch/parisc/include/asm/bug.h
@@ -46,7 +46,7 @@
 #endif
 
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-#define __WARN_TAINT(taint)						\
+#define __WARN_FLAGS(flags)						\
 	do {								\
 		asm volatile("\n"					\
 			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
@@ -56,11 +56,11 @@
 			     "\t.org 2b+%c3\n"				\
 			     "\t.popsection"				\
 			     : : "i" (__FILE__), "i" (__LINE__),	\
-			     "i" (BUGFLAG_TAINT(taint)), 		\
+			     "i" (BUGFLAG_WARNING|(flags)),		\
 			     "i" (sizeof(struct bug_entry)) );		\
 	} while(0)
 #else
-#define __WARN_TAINT(taint)						\
+#define __WARN_FLAGS(flags)						\
 	do {								\
 		asm volatile("\n"					\
 			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
@@ -69,7 +69,7 @@
 			     "\t.short %c0\n"				\
 			     "\t.org 2b+%c1\n"				\
 			     "\t.popsection"				\
-			     : : "i" (BUGFLAG_TAINT(taint)),		\
+			     : : "i" (BUGFLAG_WARNING|(flags)),		\
 			     "i" (sizeof(struct bug_entry)) );		\
 	} while(0)
 #endif
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -85,12 +85,12 @@
 	}							\
 } while (0)
 
-#define __WARN_TAINT(taint) do {				\
+#define __WARN_FLAGS(flags) do {				\
 	__asm__ __volatile__(					\
 		"1:	twi 31,0,0\n"				\
 		_EMIT_BUG_ENTRY					\
 		: : "i" (__FILE__), "i" (__LINE__),		\
-		  "i" (BUGFLAG_TAINT(taint)),			\
+		  "i" (BUGFLAG_WARNING|(flags)),		\
 		  "i" (sizeof(struct bug_entry)));		\
 } while (0)
 
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -46,8 +46,8 @@
 	unreachable();					\
 } while (0)
 
-#define __WARN_TAINT(taint) do {			\
-	__EMIT_BUG(BUGFLAG_TAINT(taint));		\
+#define __WARN_FLAGS(flags) do {			\
+	__EMIT_BUG(BUGFLAG_WARNING|(flags));		\
 } while (0)
 
 #define WARN_ON(x) ({					\
--- a/arch/sh/include/asm/bug.h
+++ b/arch/sh/include/asm/bug.h
@@ -50,7 +50,7 @@ do {							\
 		   "i" (sizeof(struct bug_entry)));	\
 } while (0)
 
-#define __WARN_TAINT(taint)				\
+#define __WARN_FLAGS(flags)				\
 do {							\
 	__asm__ __volatile__ (				\
 		"1:\t.short %O0\n"			\
@@ -59,7 +59,7 @@ do {							\
 		 : "n" (TRAPA_BUG_OPCODE),		\
 		   "i" (__FILE__),			\
 		   "i" (__LINE__),			\
-		   "i" (BUGFLAG_TAINT(taint)),		\
+		   "i" (BUGFLAG_WARNING|(flags)),	\
 		   "i" (sizeof(struct bug_entry)));	\
 } while (0)
 
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -64,7 +64,7 @@ do {								\
 	unreachable();						\
 } while (0)
 
-#define __WARN_TAINT(taint)	_BUG_FLAGS(ASM_UD0, BUGFLAG_TAINT(taint))
+#define __WARN_FLAGS(flags)	_BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
 
 #include <asm-generic/bug.h>
 
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -5,6 +5,8 @@
 
 #ifdef CONFIG_GENERIC_BUG
 #define BUGFLAG_WARNING		(1 << 0)
+#define BUGFLAG_ONCE		(1 << 1)
+#define BUGFLAG_DONE		(1 << 2)
 #define BUGFLAG_TAINT(taint)	(BUGFLAG_WARNING | ((taint) << 8))
 #define BUG_GET_TAINT(bug)	((bug)->flags >> 8)
 #endif
@@ -55,6 +57,18 @@ struct bug_entry {
 #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
 #endif
 
+#ifdef __WARN_FLAGS
+#define __WARN_TAINT(taint)		__WARN_FLAGS(BUGFLAG_TAINT(taint))
+#define __WARN_ONCE_TAINT(taint)	__WARN_FLAGS(BUGFLAG_ONCE|BUGFLAG_TAINT(taint))
+
+#define WARN_ON_ONCE(condition) ({				\
+	int __ret_warn_on = !!(condition);			\
+	if (unlikely(__ret_warn_on))				\
+		__WARN_ONCE_TAINT(TAINT_WARN);			\
+	unlikely(__ret_warn_on);				\
+})
+#endif
+
 /*
  * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
  * significant issues that need prompt attention if they should ever
@@ -97,7 +111,7 @@ void __warn(const char *file, int line,
 #endif
 
 #ifndef WARN
-#define WARN(condition, format...) ({						\
+#define WARN(condition, format...) ({					\
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on))					\
 		__WARN_printf(format);					\
@@ -112,6 +126,7 @@ void __warn(const char *file, int line,
 	unlikely(__ret_warn_on);					\
 })
 
+#ifndef WARN_ON_ONCE
 #define WARN_ON_ONCE(condition)	({				\
 	static bool __section(.data.unlikely) __warned;		\
 	int __ret_warn_once = !!(condition);			\
@@ -122,6 +137,7 @@ void __warn(const char *file, int line,
 	}							\
 	unlikely(__ret_warn_once);				\
 })
+#endif
 
 #define WARN_ONCE(condition, format...)	({			\
 	static bool __section(.data.unlikely) __warned;		\
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -286,8 +286,6 @@
 		*(.rodata1)						\
 	}								\
 									\
-	BUG_TABLE							\
-									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -855,6 +853,7 @@
 		READ_MOSTLY_DATA(cacheline)				\
 		DATA_DATA						\
 		CONSTRUCTORS						\
+		BUG_TABLE						\
 	}
 
 #define INIT_TEXT_SECTION(inittext_align)				\
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -105,7 +105,7 @@ static inline int is_warning_bug(const s
 	return bug->flags & BUGFLAG_WARNING;
 }
 
-const struct bug_entry *find_bug(unsigned long bugaddr);
+struct bug_entry *find_bug(unsigned long bugaddr);
 
 enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs);
 
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -47,7 +47,7 @@
 #include <linux/sched.h>
 #include <linux/rculist.h>
 
-extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
+extern struct bug_entry __start___bug_table[], __stop___bug_table[];
 
 static inline unsigned long bug_addr(const struct bug_entry *bug)
 {
@@ -62,10 +62,10 @@ static inline unsigned long bug_addr(con
 /* Updates are protected by module mutex */
 static LIST_HEAD(module_bug_list);
 
-static const struct bug_entry *module_find_bug(unsigned long bugaddr)
+static struct bug_entry *module_find_bug(unsigned long bugaddr)
 {
 	struct module *mod;
-	const struct bug_entry *bug = NULL;
+	struct bug_entry *bug = NULL;
 
 	rcu_read_lock_sched();
 	list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
@@ -122,15 +122,15 @@ void module_bug_cleanup(struct module *m
 
 #else
 
-static inline const struct bug_entry *module_find_bug(unsigned long bugaddr)
+static inline struct bug_entry *module_find_bug(unsigned long bugaddr)
 {
 	return NULL;
 }
 #endif
 
-const struct bug_entry *find_bug(unsigned long bugaddr)
+struct bug_entry *find_bug(unsigned long bugaddr)
 {
-	const struct bug_entry *bug;
+	struct bug_entry *bug;
 
 	for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
 		if (bugaddr == bug_addr(bug))
@@ -141,9 +141,9 @@ const struct bug_entry *find_bug(unsigne
 
 enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 {
-	const struct bug_entry *bug;
+	struct bug_entry *bug;
 	const char *file;
-	unsigned line, warning;
+	unsigned line, warning, once, done;
 
 	if (!is_valid_bugaddr(bugaddr))
 		return BUG_TRAP_TYPE_NONE;
@@ -164,6 +164,18 @@ enum bug_trap_type report_bug(unsigned l
 		line = bug->line;
 #endif
 		warning = (bug->flags & BUGFLAG_WARNING) != 0;
+		once = (bug->flags & BUGFLAG_ONCE) != 0;
+		done = (bug->flags & BUGFLAG_DONE) != 0;
+
+		if (warning && once) {
+			if (done)
+				return BUG_TRAP_TYPE_WARN;
+
+			/*
+			 * Since this is the only store, concurrency is not an issue.
+			 */
+			bug->flags |= BUGFLAG_DONE;
+		}
 	}
 
 	if (warning) {

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [RFC][PATCH] bug: Add _ONCE logic to report_bug()
  2017-02-25  8:19   ` [RFC][PATCH] bug: Add _ONCE logic to report_bug() Peter Zijlstra
@ 2017-02-25  9:18     ` Ingo Molnar
  2017-02-25  9:44       ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2017-02-25  9:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel, torvalds, arjan,
	bp, jpoimboe, richard.weinberger


* Peter Zijlstra <peterz@infradead.org> wrote:

> 
> Josh suggested moving the _ONCE logic inside the trap handler, using a
> bit in the bug_entry::flags field, avoiding the need for the extra
> variable.

This looks interesting, as the _ONCE() methods of warning are far more 
user-friendly than WARN() spam.

> Sadly this only works for WARN_ON_ONCE(), since the others have
> printk() statements prior to triggering the trap.

Which one is problematic to convert, WARN_ONCE()?

> Still, this saves some text and data:
> 
>   text            data     bss    dec              hex    filename
>   10469505        4443448  843776 15756729         f06db9 defconfig-build/vmlinux-ud0
>   10452803        4442616  843776 15739195         f0293b defconfig-build/vmlinux-ud0-once
> 
> (Only compile tested on x86_64 so far.)

That looks pretty sweet, as various almost never triggered _ONCE() checks tend to 
disturb the generated machine code quite a bit ...

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [RFC][PATCH] bug: Add _ONCE logic to report_bug()
  2017-02-25  9:18     ` Ingo Molnar
@ 2017-02-25  9:44       ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2017-02-25  9:44 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel, torvalds, arjan,
	bp, jpoimboe, richard.weinberger

On Sat, Feb 25, 2017 at 10:18:23AM +0100, Ingo Molnar wrote:

> > Sadly this only works for WARN_ON_ONCE(), since the others have
> > printk() statements prior to triggering the trap.
> 
> Which one is problematic to convert, WARN_ONCE()?

Yes, WARN_ONCE(), all the ones that have printf fmt crud in.

If we want report_bug() to do the _ONCE thing, we also need that to do
the printk().

I tried the below hackery (beware eye and brain damage ahead) which
actually compiles, but generates horrific junk -- far larger than
without.

The __builtin_va_*() crud was really not meant for things like this.

---
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -66,6 +66,11 @@ do {								\
 
 #define __WARN_FLAGS(flags)	_BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
 
+#define __WARN_ARGS_FLAGS(args, flags) do {		\
+	asm volatile ("" : : "a" (args));		\
+	_BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags));	\
+} while (0)
+
 #include <asm-generic/bug.h>
 
 #endif /* _ASM_X86_BUG_H */
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -7,6 +7,7 @@
 #define BUGFLAG_WARNING		(1 << 0)
 #define BUGFLAG_ONCE		(1 << 1)
 #define BUGFLAG_DONE		(1 << 2)
+#define BUGFLAG_ARGS		(1 << 3)
 #define BUGFLAG_TAINT(taint)	(BUGFLAG_WARNING | ((taint) << 8))
 #define BUG_GET_TAINT(bug)	((bug)->flags >> 8)
 #endif
@@ -67,8 +68,28 @@ struct bug_entry {
 		__WARN_ONCE_TAINT(TAINT_WARN);			\
 	unlikely(__ret_warn_on);				\
 })
+
+#ifdef __WARN_ARGS_FLAGS
+static inline void __va_hack(va_list *ap, ...)
+{
+	va_start(*ap, ap);
+}
+
+#define __WARN_printf(arg...) do {					\
+	va_list __ap;							\
+	__va_hack(&__ap, arg);						\
+	__WARN_ARGS_FLAGS(&__ap, BUGFLAG_ARGS|BUGFLAG_TAINT(TAINT_WARN));	\
+} while (0)
+
+#define __WARN_printf_taint(taint, arg...) do {				\
+	va_list __ap;							\
+	__va_hack(&__ap, arg);						\
+	__WARN_ARGS_FLAGS(&__ap, BUGFLAG_ARGS|BUGFLAG_TAINT(taint));	\
+} while (0)
 #endif
 
+#endif /* __WARN_FLAGS */
+
 /*
  * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
  * significant issues that need prompt attention if they should ever
@@ -90,10 +111,12 @@ extern void warn_slowpath_null(const cha
 	warn_slowpath_fmt_taint(__FILE__, __LINE__, taint, arg)
 #else
 #define __WARN()		__WARN_TAINT(TAINT_WARN)
+#ifndef __WARN_ARGS_FLAGS
 #define __WARN_printf(arg...)	do { printk(arg); __WARN(); } while (0)
 #define __WARN_printf_taint(taint, arg...)				\
 	do { printk(arg); __WARN_TAINT(taint); } while (0)
 #endif
+#endif
 
 /* used internally by panic.c */
 struct warn_args;
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -143,7 +143,8 @@ enum bug_trap_type report_bug(unsigned l
 {
 	struct bug_entry *bug;
 	const char *file;
-	unsigned line, warning, once, done;
+	unsigned line, warning, once, done, args;
+	va_list *ap;
 
 	if (!is_valid_bugaddr(bugaddr))
 		return BUG_TRAP_TYPE_NONE;
@@ -166,6 +167,7 @@ enum bug_trap_type report_bug(unsigned l
 		warning = (bug->flags & BUGFLAG_WARNING) != 0;
 		once = (bug->flags & BUGFLAG_ONCE) != 0;
 		done = (bug->flags & BUGFLAG_DONE) != 0;
+		args = (bug->flags & BUGFLAG_ARGS) != 0;
 
 		if (warning && once) {
 			if (done)
@@ -176,9 +178,16 @@ enum bug_trap_type report_bug(unsigned l
 			 */
 			bug->flags |= BUGFLAG_DONE;
 		}
+
+		if (args)
+			ap = (va_list *)regs->ax;
 	}
 
 	if (warning) {
+		if (args) {
+			const char *fmt = va_arg(*ap, const char *);
+			vprintk(fmt, *ap);
+		}
 		/* this is a WARN_ON rather than BUG/BUG_ON */
 		__warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
 		       NULL);

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-24 10:41               ` Peter Zijlstra
@ 2017-02-25 10:38                 ` Borislav Petkov
  2017-02-25 17:55                   ` hpa
  0 siblings, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2017-02-25 10:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: H. Peter Anvin, Ingo Molnar, Arjan van de Ven, Thomas Gleixner,
	linux-kernel, torvalds, jpoimboe, richard.weinberger,
	Andrew Morton

On Fri, Feb 24, 2017 at 11:41:33AM +0100, Peter Zijlstra wrote:
> So yes, its tricky but it could be done. A new single byte #UD
> instruction would be much nicer though.

Btw, if we did a new insn which means new functionality instead of
"stealing" an invalid one, we would have to have a fallback for all
those current CPUs which don't support it, which means, alternatives
patching.

Perhaps it would be better to take one of the invalid ones and future
hw can then extend it and actually make it into a special OS-INT
instruction which is small enough to be inline and can, if hit, run a
handler where you do fixup.

And then that insn could even have a immed8 arg which you can use to
pass info from the call site. IOW, something like

	...
	OSINT $12
	...

and handler inspects opcode and does things based on it...

Oh well.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-25 10:38                 ` Borislav Petkov
@ 2017-02-25 17:55                   ` hpa
  2017-02-25 19:38                     ` Borislav Petkov
  0 siblings, 1 reply; 29+ messages in thread
From: hpa @ 2017-02-25 17:55 UTC (permalink / raw)
  To: Borislav Petkov, Peter Zijlstra
  Cc: Ingo Molnar, Arjan van de Ven, Thomas Gleixner, linux-kernel,
	torvalds, jpoimboe, richard.weinberger, Andrew Morton

On February 25, 2017 2:38:08 AM PST, Borislav Petkov <bp@alien8.de> wrote:
>On Fri, Feb 24, 2017 at 11:41:33AM +0100, Peter Zijlstra wrote:
>> So yes, its tricky but it could be done. A new single byte #UD
>> instruction would be much nicer though.
>
>Btw, if we did a new insn which means new functionality instead of
>"stealing" an invalid one, we would have to have a fallback for all
>those current CPUs which don't support it, which means, alternatives
>patching.
>
>Perhaps it would be better to take one of the invalid ones and future
>hw can then extend it and actually make it into a special OS-INT
>instruction which is small enough to be inline and can, if hit, run a
>handler where you do fixup.
>
>And then that insn could even have a immed8 arg which you can use to
>pass info from the call site. IOW, something like
>
>	...
>	OSINT $12
>	...
>
>and handler inspects opcode and does things based on it...
>
>Oh well.

You mean like the INT instruction?
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-25 17:55                   ` hpa
@ 2017-02-25 19:38                     ` Borislav Petkov
  2017-02-25 20:04                       ` hpa
  0 siblings, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2017-02-25 19:38 UTC (permalink / raw)
  To: hpa
  Cc: Peter Zijlstra, Ingo Molnar, Arjan van de Ven, Thomas Gleixner,
	linux-kernel, torvalds, jpoimboe, richard.weinberger,
	Andrew Morton

On Sat, Feb 25, 2017 at 09:55:45AM -0800, hpa@zytor.com wrote:
> You mean like the INT instruction?

Right, you mentioned reusing INT $9 upthread.

That doesn't have the additional info in the immed8 - it is the vector
in this case. But that's not really important for our usage.

In any case, the hw does react to it when I do

	"int $9"

so I guess we could look into using that one.

[   93.668930] test: loading out-of-tree module taints kernel.
[   93.674785] Starting test.
[   93.677571] coprocessor segment overrun: 0000 [#1] PREEMPT SMP
[   93.683459] Modules linked in: test(O+) xfs libcrc32c exportfs snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_coded
[   93.683489] CPU: 4 PID: 2136 Comm: insmod Tainted: G           O    4.10.0-rc7+ #7
[   93.683500] Hardware name: MICRO-STAR INTERNATIONAL CO.,LTD MS-7599/870-C45 (MS-7599), BIOS V1.15 03/04/2011
[   93.683503] task: ffff88012f154380 task.stack: ffffc90006cf4000
[   93.683512] RIP: 0010:test_init+0x17/0x20 [test]
[   93.683515] RSP: 0018:ffffc90006cf7cb8 EFLAGS: 00000296
[   93.683518] RAX: 000000000000000e RBX: ffffffffa03ca0c0 RCX: 0000000000000000
[   93.683521] RDX: 000000000000000e RSI: ffffffff802d4e48 RDI: ffffffff802d4e48
[   93.683523] RBP: ffffc90006cf7cb8 R08: 0000000000000000 R09: 0000000000000274
[   93.683525] R10: 0000000000000000 R11: 0000000000000273 R12: ffffffffa03ca000
[   93.683527] R13: 0000000000000000 R14: 0000000000000001 R15: ffffc90006cf7eb0
[   93.683530] FS:  00007f55d1e3e700(0000) GS:ffff880136d00000(0000) knlGS:0000000000000000
[   93.683533] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   93.683535] CR2: 000055be657f51e8 CR3: 0000000131014000 CR4: 00000000000006e0
[   93.683537] Call Trace:
[   93.683548]  do_one_initcall+0x53/0x1a0
[   93.683554]  ? __vunmap+0xaa/0xf0
[   93.683559]  ? kfree+0x151/0x1b0
[   93.683566]  do_init_module+0x5f/0x1d5
[   93.683573]  load_module+0x2026/0x2530
[   93.683578]  ? __symbol_put+0x80/0x80
[   93.683589]  SYSC_finit_module+0xcb/0xd0
[   93.683597]  SyS_finit_module+0xe/0x10
[   93.683602]  entry_SYSCALL_64_fastpath+0x18/0xa8
[   93.683605] RIP: 0033:0x7f55d1970a59
[   93.683607] RSP: 002b:00007ffc3cf781d8 EFLAGS: 00000206 ORIG_RAX: 0000000000000139
[   93.683610] RAX: ffffffffffffffda RBX: 00007f55d1c2ab78 RCX: 00007f55d1970a59
[   93.683612] RDX: 0000000000000000 RSI: 000055be655d73d9 RDI: 0000000000000003
[   93.683614] RBP: 000000000000270f R08: 0000000000000000 R09: 00007f55d1c2cf00
[   93.683616] R10: 0000000000000003 R11: 0000000000000206 R12: 00007f55d1c2ab78
[   93.683618] R13: 0000000000001020 R14: 00007f55d1c2ab78 R15: 00007f55d1c2ab20
[   93.683623] Code: <31> c0 5d c3 0f 1f 44 00 00 0f 1f 44 00 00 55 48 c7 c7 6e a0 3c a0 
[   93.683655] RIP: test_init+0x17/0x20 [test] RSP: ffffc90006cf7cb8
[   93.683699] ---[ end trace e034b65a8bb8cf26 ]---
[   93.683702] Kernel panic - not syncing: Fatal exception
[   93.709252] Kernel Offset: disabled

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-25 19:38                     ` Borislav Petkov
@ 2017-02-25 20:04                       ` hpa
  2017-02-25 20:29                         ` Borislav Petkov
  0 siblings, 1 reply; 29+ messages in thread
From: hpa @ 2017-02-25 20:04 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Peter Zijlstra, Ingo Molnar, Arjan van de Ven, Thomas Gleixner,
	linux-kernel, torvalds, jpoimboe, richard.weinberger,
	Andrew Morton

On February 25, 2017 11:38:44 AM PST, Borislav Petkov <bp@alien8.de> wrote:
>On Sat, Feb 25, 2017 at 09:55:45AM -0800, hpa@zytor.com wrote:
>> You mean like the INT instruction?
>
>Right, you mentioned reusing INT $9 upthread.
>
>That doesn't have the additional info in the immed8 - it is the vector
>in this case. But that's not really important for our usage.
>
>In any case, the hw does react to it when I do
>
>	"int $9"
>
>so I guess we could look into using that one.
>
>[   93.668930] test: loading out-of-tree module taints kernel.
>[   93.674785] Starting test.
>[   93.677571] coprocessor segment overrun: 0000 [#1] PREEMPT SMP
>[   93.683459] Modules linked in: test(O+) xfs libcrc32c exportfs
>snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_coded
>[   93.683489] CPU: 4 PID: 2136 Comm: insmod Tainted: G           O   
>4.10.0-rc7+ #7
>[   93.683500] Hardware name: MICRO-STAR INTERNATIONAL CO.,LTD
>MS-7599/870-C45 (MS-7599), BIOS V1.15 03/04/2011
>[   93.683503] task: ffff88012f154380 task.stack: ffffc90006cf4000
>[   93.683512] RIP: 0010:test_init+0x17/0x20 [test]
>[   93.683515] RSP: 0018:ffffc90006cf7cb8 EFLAGS: 00000296
>[   93.683518] RAX: 000000000000000e RBX: ffffffffa03ca0c0 RCX:
>0000000000000000
>[   93.683521] RDX: 000000000000000e RSI: ffffffff802d4e48 RDI:
>ffffffff802d4e48
>[   93.683523] RBP: ffffc90006cf7cb8 R08: 0000000000000000 R09:
>0000000000000274
>[   93.683525] R10: 0000000000000000 R11: 0000000000000273 R12:
>ffffffffa03ca000
>[   93.683527] R13: 0000000000000000 R14: 0000000000000001 R15:
>ffffc90006cf7eb0
>[   93.683530] FS:  00007f55d1e3e700(0000) GS:ffff880136d00000(0000)
>knlGS:0000000000000000
>[   93.683533] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>[   93.683535] CR2: 000055be657f51e8 CR3: 0000000131014000 CR4:
>00000000000006e0
>[   93.683537] Call Trace:
>[   93.683548]  do_one_initcall+0x53/0x1a0
>[   93.683554]  ? __vunmap+0xaa/0xf0
>[   93.683559]  ? kfree+0x151/0x1b0
>[   93.683566]  do_init_module+0x5f/0x1d5
>[   93.683573]  load_module+0x2026/0x2530
>[   93.683578]  ? __symbol_put+0x80/0x80
>[   93.683589]  SYSC_finit_module+0xcb/0xd0
>[   93.683597]  SyS_finit_module+0xe/0x10
>[   93.683602]  entry_SYSCALL_64_fastpath+0x18/0xa8
>[   93.683605] RIP: 0033:0x7f55d1970a59
>[   93.683607] RSP: 002b:00007ffc3cf781d8 EFLAGS: 00000206 ORIG_RAX:
>0000000000000139
>[   93.683610] RAX: ffffffffffffffda RBX: 00007f55d1c2ab78 RCX:
>00007f55d1970a59
>[   93.683612] RDX: 0000000000000000 RSI: 000055be655d73d9 RDI:
>0000000000000003
>[   93.683614] RBP: 000000000000270f R08: 0000000000000000 R09:
>00007f55d1c2cf00
>[   93.683616] R10: 0000000000000003 R11: 0000000000000206 R12:
>00007f55d1c2ab78
>[   93.683618] R13: 0000000000001020 R14: 00007f55d1c2ab78 R15:
>00007f55d1c2ab20
>[   93.683623] Code: <31> c0 5d c3 0f 1f 44 00 00 0f 1f 44 00 00 55 48
>c7 c7 6e a0 3c a0 
>[   93.683655] RIP: test_init+0x17/0x20 [test] RSP: ffffc90006cf7cb8
>[   93.683699] ---[ end trace e034b65a8bb8cf26 ]---
>[   93.683702] Kernel panic - not syncing: Fatal exception
>[   93.709252] Kernel Offset: disabled

Note that once you have a trap you can create an immediate yourself, the CPU doesn't need to do it for you, unless you really care about latency (reading the instruction steam can be kind of expensive, although it is quite a bit simpler if we know we come from kernel space.)
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] x86: Implement __WARN using UD0
  2017-02-25 20:04                       ` hpa
@ 2017-02-25 20:29                         ` Borislav Petkov
  0 siblings, 0 replies; 29+ messages in thread
From: Borislav Petkov @ 2017-02-25 20:29 UTC (permalink / raw)
  To: hpa
  Cc: Peter Zijlstra, Ingo Molnar, Arjan van de Ven, Thomas Gleixner,
	linux-kernel, torvalds, jpoimboe, richard.weinberger,
	Andrew Morton

On Sat, Feb 25, 2017 at 12:04:18PM -0800, hpa@zytor.com wrote:
> Note that once you have a trap you can create an immediate yourself,
> the CPU doesn't need to do it for you, unless you really care about
> latency (reading the instruction steam can be kind of expensive,
> although it is quite a bit simpler if we know we come from kernel
> space.)

You mean, put it after the INT 9 opcode?

CD 09 immed8

Yeah, that could be one way to pass callsite-specific info, if needed.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2017-02-25 20:43 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-23 13:28 [PATCH] x86: Implement __WARN using UD0 Peter Zijlstra
2017-02-23 14:09 ` Peter Zijlstra
2017-02-23 14:59   ` Peter Zijlstra
2017-02-23 15:09   ` hpa
2017-02-23 15:23     ` Peter Zijlstra
2017-02-23 15:32       ` hpa
2017-02-23 16:03         ` Borislav Petkov
2017-02-23 14:12 ` Josh Poimboeuf
2017-02-23 14:17   ` Borislav Petkov
2017-02-23 14:36   ` Peter Zijlstra
2017-02-23 14:14 ` Arjan van de Ven
2017-02-23 14:57   ` Peter Zijlstra
2017-02-24  7:43     ` Ingo Molnar
2017-02-24  8:31       ` Peter Zijlstra
2017-02-24  9:06         ` hpa
2017-02-24  9:11         ` hpa
2017-02-24  9:15           ` Ingo Molnar
2017-02-24  9:46             ` Borislav Petkov
2017-02-24  9:52               ` H. Peter Anvin
     [not found]             ` <21ac6e53-2fcd-52e9-e72d-9faf7da14d1e@zytor.com>
2017-02-24 10:41               ` Peter Zijlstra
2017-02-25 10:38                 ` Borislav Petkov
2017-02-25 17:55                   ` hpa
2017-02-25 19:38                     ` Borislav Petkov
2017-02-25 20:04                       ` hpa
2017-02-25 20:29                         ` Borislav Petkov
2017-02-24 11:16 ` [PATCH -v2] " Peter Zijlstra
2017-02-25  8:19   ` [RFC][PATCH] bug: Add _ONCE logic to report_bug() Peter Zijlstra
2017-02-25  9:18     ` Ingo Molnar
2017-02-25  9:44       ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.