All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
	rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com,
	jbaron@akamai.com, torvalds@linux-foundation.org,
	tglx@linutronix.de, mingo@kernel.org, namit@vmware.com,
	hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org,
	jpoimboe@redhat.com, hjl.tools@gmail.com
Subject: [RFC][PATCH 9/9] jump_label, x86: Enable JMP8/NOP2 support
Date: Mon, 07 Oct 2019 10:44:52 +0200	[thread overview]
Message-ID: <20191007090012.28803430.0@infradead.org> (raw)
In-Reply-To: 20191007084443.79370128.1@infradead.org

Enable and emit short JMP/NOP jump_label entries.

A lot of the jumps are in fact short, like around tracepoints:

0000 0000000000000920 <native_read_msr>:                                   | 0000 0000000000000920 <native_read_msr>:
0000      920:  53                      push   %rbx                        | 0000      920:  53                      push   %rbx
0001      921:  89 f9                   mov    %edi,%ecx                   | 0001      921:  89 f9                   mov    %edi,%ecx
0003      923:  0f 32                   rdmsr                              | 0003      923:  0f 32                   rdmsr
0005      925:  48 c1 e2 20             shl    $0x20,%rdx                  | 0005      925:  48 c1 e2 20             shl    $0x20,%rdx
0009      929:  48 89 d3                mov    %rdx,%rbx                   | 0009      929:  48 89 d3                mov    %rdx,%rbx
000c      92c:  48 09 c3                or     %rax,%rbx                   | 000c      92c:  48 09 c3                or     %rax,%rbx
000f      92f:  0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)            \ 000f      92f:  66 90                   xchg   %ax,%ax
0014      934:  48 89 d8                mov    %rbx,%rax                   \ 0011      931:  48 89 d8                mov    %rbx,%rax
0017      937:  5b                      pop    %rbx                        \ 0014      934:  5b                      pop    %rbx
0018      938:  c3                      retq                               \ 0015      935:  c3                      retq
0019      939:  48 89 de                mov    %rbx,%rsi                   \ 0016      936:  48 89 de                mov    %rbx,%rsi
001c      93c:  31 d2                   xor    %edx,%edx                   \ 0019      939:  31 d2                   xor    %edx,%edx
001e      93e:  e8 00 00 00 00          callq  943 <native_read_msr+0x23>  \ 001b      93b:  e8 00 00 00 00          callq  940 <native_read_msr+0x20>
001f                    93f: R_X86_64_PLT32     do_trace_read_msr-0x4      \ 001c                    93c: R_X86_64_PLT32     do_trace_read_msr-0x4
0023      943:  48 89 d8                mov    %rbx,%rax                   \ 0020      940:  48 89 d8                mov    %rbx,%rax
0026      946:  5b                      pop    %rbx                        \ 0023      943:  5b                      pop    %rbx
0027      947:  c3                      retq                               \ 0024      944:  c3                      retq

.rela__jump_table
  000000000010  000200000002 R_X86_64_PC32     0000000000000000 .text + 92f
  000000000014  000200000002 R_X86_64_PC32     0000000000000000 .text + 939 (or 936)
  000000000018  014500000018 R_X86_64_PC64     0000000000000000 __tracepoint_read_msr + 8

The below patch works as long as the jump doesn't cross sections; the
moment GCC generates a branch crossing sections and feeds it into our
asm-goto things come apart like:

  /tmp/ccM70dCh.s: Assembler messages:
  /tmp/ccM70dCh.s: Error: invalid operands (.text.unlikely and .text sections) for `-' when setting `disp'
  ../arch/x86/include/asm/jump_label.h:39: Error: invalid operands (.text.unlikely and *ABS* sections) for `>>'
  ../arch/x86/include/asm/jump_label.h:39: Error: invalid operands (.text.unlikely and *ABS* sections) for `>>'

Which is really unfortunate since it is a completely sane thing to
happen. We really need a GAS extention to handle this :-/

All we really need is to detect the two offsets are from different
sections and punt to the 5 byte nop. But AFAICT there is nothing that
can do that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: "H.J. Lu" <hjl.tools@gmail.com>
---
 arch/x86/Kconfig                  |   10 ++++++++++
 arch/x86/include/asm/jump_label.h |   36 +++++++++++++++++++++++++++++++++++-
 arch/x86/kernel/jump_label.c      |   17 +++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -230,6 +230,16 @@ config X86
 	select X86_FEATURE_NAMES		if PROC_FS
 	select PROC_PID_ARCH_STATUS		if PROC_FS
 
+#
+# This mostly depends on the asm ".nops 5" directive existing and emitting a
+# single instruction nop, this is true for x86_64, but not for i386, which
+# violates the single instruction constraint.
+#
+config CC_HAS_ASM_NOPS
+	def_bool y
+	depends on X86_64
+	depends on $(success,echo 'void foo(void) { asm inline (".nops 5"); }' | $(CC) -x c - -c -o /dev/null)
+
 config INSTRUCTION_DECODER
 	def_bool y
 	depends on KPROBES || PERF_EVENTS || UPROBES
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,6 +4,10 @@
 
 #define HAVE_JUMP_LABEL_BATCH
 
+#ifdef CONFIG_CC_HAS_ASM_NOPS
+#define HAVE_JUMP_LABEL_VARIABLE
+#endif
+
 #ifdef CONFIG_X86_64
 # define STATIC_KEY_NOP2 P6_NOP2
 # define STATIC_KEY_NOP5 P6_NOP5_ATOMIC
@@ -31,7 +35,29 @@
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+		/*
+		 * This comes apart mightily when %[l_yes] and 1b are in
+		 * different sections; like for instance .text and
+		 * .text.unlikely. Sadly there is nothing to actually detect
+		 * and handle this case explicitly.
+		 *
+		 * GAS sucks!!
+		 */
+		".set disp, (%l[l_yes]) - (1b + 2) \n\t"
+		".set res, (disp >> 31) == (disp >> 7) \n\t"
+		".set is_byte, -res \n\t"
+		".set is_long, -(~res) \n\t"
+
+		/*
+		 * This relies on .nops:
+		 *  - matching the above STATIC_KEY_NOP* bytes
+		 *  - emitting a single instruction nop for 2 and 5 bytes.
+		 */
+		".nops (2*is_byte) + (5*is_long)\n\t"
+#else
 		".byte " __stringify(STATIC_KEY_NOP5) "\n\t"
+#endif
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -43,8 +69,13 @@ static __always_inline bool arch_static_
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+		"jmp %l[l_yes] \n\t"
+#else
+		/* Equivalent to "jmp.d32 \target" */
 		".byte 0xe9 \n\t"
 		".long %l[l_yes] - (. + 4) \n\t"
+#endif
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -59,9 +90,12 @@ extern int arch_jump_entry_size(struct j
 
 .macro STATIC_BRANCH_FALSE_LIKELY target, key
 .Lstatic_jump_\@:
-	/* Equivalent to "jmp.d32 \target" */
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+	jmp \target
+#else
 	.byte		0xe9
 	.long		\target - (. + 4)
+#endif
 
 	.pushsection __jump_table, "aw"
 	_ASM_ALIGN
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -18,7 +18,24 @@
 
 int arch_jump_entry_size(struct jump_entry *entry)
 {
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+	struct insn insn;
+
+	/*
+	 * Because the instruction size heuristic doesn't purely rely on
+	 * displacement, but also on section, and we're hindered by GNU as UB
+	 * to emit the assemble time choice, we have to discover the size at
+	 * runtime.
+	 */
+	kernel_insn_init(&insn, (void *)jump_entry_code(entry), MAX_INSN_SIZE);
+	insn_get_length(&insn);
+	BUG_ON(!insn_complete(&insn));
+	BUG_ON(insn.length != 2 && insn.length != 5);
+
+	return insn.length;
+#else
 	return JMP32_INSN_SIZE;
+#endif
 }
 
 struct jump_label_patch {



  parent reply	other threads:[~2019-10-07 11:24 UTC|newest]

Thread overview: 128+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-07  9:02 [RESEND] everything text-poke: ftrace, modules, static_call and jump_label Peter Zijlstra
2019-10-07  8:17 ` [PATCH v3 0/6] Rewrite x86/ftrace to use text_poke() Peter Zijlstra
2019-10-07  8:17   ` [PATCH v3 1/6] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-10-08 14:29     ` Borislav Petkov
2019-10-08 14:40       ` Steven Rostedt
2019-10-08 14:50         ` Borislav Petkov
2019-10-08 14:48       ` Peter Zijlstra
2019-10-08 14:54         ` Borislav Petkov
2019-10-08 15:04           ` Steven Rostedt
2019-10-08 15:24             ` Borislav Petkov
2019-10-09 12:03     ` Daniel Bristot de Oliveira
2019-10-07  8:17   ` [PATCH v3 2/6] x86/alternatives: Update int3_emulate_push() comment Peter Zijlstra
2019-10-07  8:17   ` [PATCH v3 3/6] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra
2019-10-09 12:04     ` Daniel Bristot de Oliveira
2019-10-07  8:17   ` [PATCH v3 4/6] x86/alternatives: Add and use text_gen_insn() helper Peter Zijlstra
2019-10-08  6:23     ` Masami Hiramatsu
2019-10-08  8:15       ` Peter Zijlstra
2019-10-07  8:17   ` [PATCH v3 5/6] x86/ftrace: Use text_poke() Peter Zijlstra
2019-10-08 14:43     ` Steven Rostedt
2019-10-08 17:11       ` Peter Zijlstra
2019-10-08 17:27         ` Steven Rostedt
2019-10-10  2:41       ` Steven Rostedt
2019-10-10  9:20         ` Peter Zijlstra
2019-10-10 13:19           ` Steven Rostedt
2019-10-10 14:05             ` Peter Zijlstra
2019-10-10 15:54               ` Steven Rostedt
2019-10-10 17:28                 ` Peter Zijlstra
2019-10-10 17:48                   ` Steven Rostedt
2019-10-11 10:45                     ` Peter Zijlstra
2019-10-11 10:47                       ` Peter Zijlstra
2019-10-11 10:50                         ` Peter Zijlstra
2019-10-11 12:59                   ` Peter Zijlstra
2019-10-11 13:33                     ` Steven Rostedt
2019-10-11 13:45                       ` Peter Zijlstra
2019-10-15 13:07                     ` Jessica Yu
2019-10-15 13:56                       ` Peter Zijlstra
2019-10-15 14:11                         ` Peter Zijlstra
2019-10-15 14:13                         ` Miroslav Benes
2019-10-15 15:06                           ` Joe Lawrence
2019-10-15 15:31                             ` Jessica Yu
2019-10-15 22:17                               ` Joe Lawrence
2019-10-15 22:27                                 ` Steven Rostedt
2019-10-16  7:42                                   ` Peter Zijlstra
2019-10-16 10:15                                     ` Miroslav Benes
2019-10-21 15:05                                     ` Josh Poimboeuf
2020-01-20 16:50                                       ` Josh Poimboeuf
2020-01-21  8:35                                         ` Miroslav Benes
2020-01-21 16:10                                           ` Josh Poimboeuf
2020-01-22 10:09                                             ` Miroslav Benes
2020-01-22 21:42                                               ` Josh Poimboeuf
2020-01-28  9:28                                                 ` Miroslav Benes
2020-01-28 15:00                                                   ` Josh Poimboeuf
2020-01-28 15:40                                                     ` Petr Mladek
2020-01-28 17:02                                                       ` Josh Poimboeuf
2020-01-29  0:46                                                         ` Jiri Kosina
2020-01-29  2:17                                                           ` Josh Poimboeuf
2020-01-29  3:14                                                             ` Jiri Kosina
2020-01-29 12:28                                                         ` Miroslav Benes
2020-01-29 15:59                                                           ` Josh Poimboeuf
2020-01-30  9:53                                                             ` Petr Mladek
2020-01-30 14:17                                                               ` Josh Poimboeuf
2020-01-31  7:17                                                                 ` Petr Mladek
2020-01-22 12:15                                             ` Miroslav Benes
2020-01-22 15:05                                               ` Miroslav Benes
2020-01-22 22:03                                                 ` Josh Poimboeuf
2020-01-23 10:19                                                   ` Martin Jambor
2019-10-16  7:49                                   ` Peter Zijlstra
2019-10-16 10:20                                     ` Miroslav Benes
2019-10-16 13:29                                       ` Miroslav Benes
2019-10-18 13:03                                         ` Jessica Yu
2019-10-18 13:40                                           ` Petr Mladek
2019-10-21 14:14                                             ` Jessica Yu
2019-10-21 15:31                                             ` Josh Poimboeuf
2019-10-22  8:27                                           ` Miroslav Benes
2019-10-22 14:31                                             ` Josh Poimboeuf
2019-10-23  9:04                                               ` Miroslav Benes
2019-10-16  6:51                             ` Miroslav Benes
2019-10-16  9:23                               ` Peter Zijlstra
2019-10-16  9:36                                 ` Jessica Yu
2019-10-16  9:51                                   ` Peter Zijlstra
2019-10-16 12:39                               ` Peter Zijlstra
2019-10-22  8:45                                 ` Miroslav Benes
2019-10-15 14:42                         ` Peter Zijlstra
2019-10-15 18:31                           ` Peter Zijlstra
2019-10-15 15:51                         ` Jessica Yu
2019-10-15 13:28                     ` Steven Rostedt
2019-10-15 13:42                       ` Peter Zijlstra
2019-10-15 16:09                       ` Jessica Yu
2019-10-07  8:17   ` [PATCH v3 6/6] x86/mm: Remove set_kernel_text_r[ow]() Peter Zijlstra
2019-10-08 15:07   ` [PATCH v3 0/6] Rewrite x86/ftrace to use text_poke() Steven Rostedt
2019-10-07  8:25 ` [PATCH v2 0/4] Propagate module notifier errors Peter Zijlstra
2019-10-07  8:25   ` [PATCH v2 1/4] notifier: Fix broken error handling pattern Peter Zijlstra
2019-10-10 22:01     ` Rafael J. Wysocki
2019-10-07  8:25   ` [PATCH v2 2/4] module: Fix up module_notifier return values Peter Zijlstra
2019-10-23 19:25     ` Steven Rostedt
2019-10-07  8:25   ` [PATCH v2 3/4] module: Properly propagate MODULE_STATE_COMING failure Peter Zijlstra
2019-10-08 13:08     ` Miroslav Benes
2019-10-07  8:25   ` [PATCH v2 4/4] jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved Peter Zijlstra
2019-10-23 19:29     ` Steven Rostedt
2019-10-07  8:27 ` [PATCH v2 00/13] Add static_call() Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 01/13] compiler.h: Make __ADDRESSABLE() symbol truly unique Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 02/13] static_call: Add basic static call infrastructure Peter Zijlstra
2019-10-07 11:33     ` Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 03/13] static_call: Add inline " Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 04/13] static_call: Avoid kprobes on inline static_call()s Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 05/13] x86/static_call: Add out-of-line static call implementation Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 06/13] x86/static_call: Add inline static call implementation for x86-64 Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 07/13] static_call: Simple self-test Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 08/13] tracepoints: Use static_call Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 09/13] x86/alternatives: Teach text_poke_bp() to emulate RET Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 10/13] static_call: Add static_cond_call() Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 11/13] static_call: Handle tail-calls Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 12/13] static_call: Allow early init Peter Zijlstra
2019-10-07  8:27   ` [RFC][PATCH v2 13/13] x86/perf, static_call: Optimize x86_pmu methods Peter Zijlstra
2019-10-07 11:33   ` [PATCH v2 00/13] Add static_call() Peter Zijlstra
2019-10-07  8:44 ` [RFC][PATCH 0/9] Variable size jump_label support Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 1/9] jump_label, x86: Strip ASM " Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 2/9] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 3/9] jump_label, x86: Remove init NOP optimization Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 4/9] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 5/9] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 6/9] jump_label, x86: Add variable length patching support Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 7/9] jump_label,objtool: Validate variable size jump labels Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 8/9] jump_label,objtool: Generate possible statistics Peter Zijlstra
2019-10-07  8:44   ` Peter Zijlstra [this message]
2019-10-07 12:07   ` [RFC][PATCH 0/9] Variable size jump_label support Peter Zijlstra
2019-10-07 12:55     ` Ingo Molnar
2019-10-07 15:08       ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191007090012.28803430.0@infradead.org \
    --to=peterz@infradead.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bristot@redhat.com \
    --cc=hjl.tools@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jbaron@akamai.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namit@vmware.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.