linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
	rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com,
	jbaron@akamai.com, torvalds@linux-foundation.org,
	tglx@linutronix.de, mingo@kernel.org, namit@vmware.com,
	hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org,
	jpoimboe@redhat.com, hjl.tools@gmail.com
Subject: [RFC][PATCH 9/9] jump_label, x86: Enable JMP8/NOP2 support
Date: Mon, 07 Oct 2019 10:44:52 +0200	[thread overview]
Message-ID: <20191007090012.28803430.0@infradead.org> (raw)
In-Reply-To: 20191007084443.79370128.1@infradead.org

Enable and emit short JMP/NOP jump_label entries.

A lot of the jumps are in fact short, like around tracepoints:

0000 0000000000000920 <native_read_msr>:                                   | 0000 0000000000000920 <native_read_msr>:
0000      920:  53                      push   %rbx                        | 0000      920:  53                      push   %rbx
0001      921:  89 f9                   mov    %edi,%ecx                   | 0001      921:  89 f9                   mov    %edi,%ecx
0003      923:  0f 32                   rdmsr                              | 0003      923:  0f 32                   rdmsr
0005      925:  48 c1 e2 20             shl    $0x20,%rdx                  | 0005      925:  48 c1 e2 20             shl    $0x20,%rdx
0009      929:  48 89 d3                mov    %rdx,%rbx                   | 0009      929:  48 89 d3                mov    %rdx,%rbx
000c      92c:  48 09 c3                or     %rax,%rbx                   | 000c      92c:  48 09 c3                or     %rax,%rbx
000f      92f:  0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)            \ 000f      92f:  66 90                   xchg   %ax,%ax
0014      934:  48 89 d8                mov    %rbx,%rax                   \ 0011      931:  48 89 d8                mov    %rbx,%rax
0017      937:  5b                      pop    %rbx                        \ 0014      934:  5b                      pop    %rbx
0018      938:  c3                      retq                               \ 0015      935:  c3                      retq
0019      939:  48 89 de                mov    %rbx,%rsi                   \ 0016      936:  48 89 de                mov    %rbx,%rsi
001c      93c:  31 d2                   xor    %edx,%edx                   \ 0019      939:  31 d2                   xor    %edx,%edx
001e      93e:  e8 00 00 00 00          callq  943 <native_read_msr+0x23>  \ 001b      93b:  e8 00 00 00 00          callq  940 <native_read_msr+0x20>
001f                    93f: R_X86_64_PLT32     do_trace_read_msr-0x4      \ 001c                    93c: R_X86_64_PLT32     do_trace_read_msr-0x4
0023      943:  48 89 d8                mov    %rbx,%rax                   \ 0020      940:  48 89 d8                mov    %rbx,%rax
0026      946:  5b                      pop    %rbx                        \ 0023      943:  5b                      pop    %rbx
0027      947:  c3                      retq                               \ 0024      944:  c3                      retq

.rela__jump_table
  000000000010  000200000002 R_X86_64_PC32     0000000000000000 .text + 92f
  000000000014  000200000002 R_X86_64_PC32     0000000000000000 .text + 939 (or 936)
  000000000018  014500000018 R_X86_64_PC64     0000000000000000 __tracepoint_read_msr + 8

The below patch works as long as the jump doesn't cross sections; the
moment GCC generates a branch crossing sections and feeds it into our
asm-goto things come apart like:

  /tmp/ccM70dCh.s: Assembler messages:
  /tmp/ccM70dCh.s: Error: invalid operands (.text.unlikely and .text sections) for `-' when setting `disp'
  ../arch/x86/include/asm/jump_label.h:39: Error: invalid operands (.text.unlikely and *ABS* sections) for `>>'
  ../arch/x86/include/asm/jump_label.h:39: Error: invalid operands (.text.unlikely and *ABS* sections) for `>>'

Which is really unfortunate since it is a completely sane thing to
happen. We really need a GAS extention to handle this :-/

All we really need is to detect the two offsets are from different
sections and punt to the 5 byte nop. But AFAICT there is nothing that
can do that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: "H.J. Lu" <hjl.tools@gmail.com>
---
 arch/x86/Kconfig                  |   10 ++++++++++
 arch/x86/include/asm/jump_label.h |   36 +++++++++++++++++++++++++++++++++++-
 arch/x86/kernel/jump_label.c      |   17 +++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -230,6 +230,16 @@ config X86
 	select X86_FEATURE_NAMES		if PROC_FS
 	select PROC_PID_ARCH_STATUS		if PROC_FS
 
+#
+# This mostly depends on the asm ".nops 5" directive existing and emitting a
+# single instruction nop, this is true for x86_64, but not for i386, which
+# violates the single instruction constraint.
+#
+config CC_HAS_ASM_NOPS
+	def_bool y
+	depends on X86_64
+	depends on $(success,echo 'void foo(void) { asm inline (".nops 5"); }' | $(CC) -x c - -c -o /dev/null)
+
 config INSTRUCTION_DECODER
 	def_bool y
 	depends on KPROBES || PERF_EVENTS || UPROBES
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,6 +4,10 @@
 
 #define HAVE_JUMP_LABEL_BATCH
 
+#ifdef CONFIG_CC_HAS_ASM_NOPS
+#define HAVE_JUMP_LABEL_VARIABLE
+#endif
+
 #ifdef CONFIG_X86_64
 # define STATIC_KEY_NOP2 P6_NOP2
 # define STATIC_KEY_NOP5 P6_NOP5_ATOMIC
@@ -31,7 +35,29 @@
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+		/*
+		 * This comes apart mightily when %[l_yes] and 1b are in
+		 * different sections; like for instance .text and
+		 * .text.unlikely. Sadly there is nothing to actually detect
+		 * and handle this case explicitly.
+		 *
+		 * GAS sucks!!
+		 */
+		".set disp, (%l[l_yes]) - (1b + 2) \n\t"
+		".set res, (disp >> 31) == (disp >> 7) \n\t"
+		".set is_byte, -res \n\t"
+		".set is_long, -(~res) \n\t"
+
+		/*
+		 * This relies on .nops:
+		 *  - matching the above STATIC_KEY_NOP* bytes
+		 *  - emitting a single instruction nop for 2 and 5 bytes.
+		 */
+		".nops (2*is_byte) + (5*is_long)\n\t"
+#else
 		".byte " __stringify(STATIC_KEY_NOP5) "\n\t"
+#endif
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -43,8 +69,13 @@ static __always_inline bool arch_static_
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+		"jmp %l[l_yes] \n\t"
+#else
+		/* Equivalent to "jmp.d32 \target" */
 		".byte 0xe9 \n\t"
 		".long %l[l_yes] - (. + 4) \n\t"
+#endif
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -59,9 +90,12 @@ extern int arch_jump_entry_size(struct j
 
 .macro STATIC_BRANCH_FALSE_LIKELY target, key
 .Lstatic_jump_\@:
-	/* Equivalent to "jmp.d32 \target" */
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+	jmp \target
+#else
 	.byte		0xe9
 	.long		\target - (. + 4)
+#endif
 
 	.pushsection __jump_table, "aw"
 	_ASM_ALIGN
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -18,7 +18,24 @@
 
 int arch_jump_entry_size(struct jump_entry *entry)
 {
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+	struct insn insn;
+
+	/*
+	 * Because the instruction size heuristic doesn't purely rely on
+	 * displacement, but also on section, and we're hindered by GNU as UB
+	 * to emit the assemble time choice, we have to discover the size at
+	 * runtime.
+	 */
+	kernel_insn_init(&insn, (void *)jump_entry_code(entry), MAX_INSN_SIZE);
+	insn_get_length(&insn);
+	BUG_ON(!insn_complete(&insn));
+	BUG_ON(insn.length != 2 && insn.length != 5);
+
+	return insn.length;
+#else
 	return JMP32_INSN_SIZE;
+#endif
 }
 
 struct jump_label_patch {



  parent reply	other threads:[~2019-10-07 11:24 UTC|newest]

Thread overview: 128+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-07  9:02 [RESEND] everything text-poke: ftrace, modules, static_call and jump_label Peter Zijlstra
2019-10-07  8:17 ` [PATCH v3 0/6] Rewrite x86/ftrace to use text_poke() Peter Zijlstra
2019-10-07  8:17   ` [PATCH v3 1/6] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-10-08 14:29     ` Borislav Petkov
2019-10-08 14:40       ` Steven Rostedt
2019-10-08 14:50         ` Borislav Petkov
2019-10-08 14:48       ` Peter Zijlstra
2019-10-08 14:54         ` Borislav Petkov
2019-10-08 15:04           ` Steven Rostedt
2019-10-08 15:24             ` Borislav Petkov
2019-10-09 12:03     ` Daniel Bristot de Oliveira
2019-10-07  8:17   ` [PATCH v3 2/6] x86/alternatives: Update int3_emulate_push() comment Peter Zijlstra
2019-10-07  8:17   ` [PATCH v3 3/6] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra
2019-10-09 12:04     ` Daniel Bristot de Oliveira
2019-10-07  8:17   ` [PATCH v3 4/6] x86/alternatives: Add and use text_gen_insn() helper Peter Zijlstra
2019-10-08  6:23     ` Masami Hiramatsu
2019-10-08  8:15       ` Peter Zijlstra
2019-10-07  8:17   ` [PATCH v3 5/6] x86/ftrace: Use text_poke() Peter Zijlstra
2019-10-08 14:43     ` Steven Rostedt
2019-10-08 17:11       ` Peter Zijlstra
2019-10-08 17:27         ` Steven Rostedt
2019-10-10  2:41       ` Steven Rostedt
2019-10-10  9:20         ` Peter Zijlstra
2019-10-10 13:19           ` Steven Rostedt
2019-10-10 14:05             ` Peter Zijlstra
2019-10-10 15:54               ` Steven Rostedt
2019-10-10 17:28                 ` Peter Zijlstra
2019-10-10 17:48                   ` Steven Rostedt
2019-10-11 10:45                     ` Peter Zijlstra
2019-10-11 10:47                       ` Peter Zijlstra
2019-10-11 10:50                         ` Peter Zijlstra
2019-10-11 12:59                   ` Peter Zijlstra
2019-10-11 13:33                     ` Steven Rostedt
2019-10-11 13:45                       ` Peter Zijlstra
2019-10-15 13:07                     ` Jessica Yu
2019-10-15 13:56                       ` Peter Zijlstra
2019-10-15 14:11                         ` Peter Zijlstra
2019-10-15 14:13                         ` Miroslav Benes
2019-10-15 15:06                           ` Joe Lawrence
2019-10-15 15:31                             ` Jessica Yu
2019-10-15 22:17                               ` Joe Lawrence
2019-10-15 22:27                                 ` Steven Rostedt
2019-10-16  7:42                                   ` Peter Zijlstra
2019-10-16 10:15                                     ` Miroslav Benes
2019-10-21 15:05                                     ` Josh Poimboeuf
2020-01-20 16:50                                       ` Josh Poimboeuf
2020-01-21  8:35                                         ` Miroslav Benes
2020-01-21 16:10                                           ` Josh Poimboeuf
2020-01-22 10:09                                             ` Miroslav Benes
2020-01-22 21:42                                               ` Josh Poimboeuf
2020-01-28  9:28                                                 ` Miroslav Benes
2020-01-28 15:00                                                   ` Josh Poimboeuf
2020-01-28 15:40                                                     ` Petr Mladek
2020-01-28 17:02                                                       ` Josh Poimboeuf
2020-01-29  0:46                                                         ` Jiri Kosina
2020-01-29  2:17                                                           ` Josh Poimboeuf
2020-01-29  3:14                                                             ` Jiri Kosina
2020-01-29 12:28                                                         ` Miroslav Benes
2020-01-29 15:59                                                           ` Josh Poimboeuf
2020-01-30  9:53                                                             ` Petr Mladek
2020-01-30 14:17                                                               ` Josh Poimboeuf
2020-01-31  7:17                                                                 ` Petr Mladek
2020-01-22 12:15                                             ` Miroslav Benes
2020-01-22 15:05                                               ` Miroslav Benes
2020-01-22 22:03                                                 ` Josh Poimboeuf
2020-01-23 10:19                                                   ` Martin Jambor
2019-10-16  7:49                                   ` Peter Zijlstra
2019-10-16 10:20                                     ` Miroslav Benes
2019-10-16 13:29                                       ` Miroslav Benes
2019-10-18 13:03                                         ` Jessica Yu
2019-10-18 13:40                                           ` Petr Mladek
2019-10-21 14:14                                             ` Jessica Yu
2019-10-21 15:31                                             ` Josh Poimboeuf
2019-10-22  8:27                                           ` Miroslav Benes
2019-10-22 14:31                                             ` Josh Poimboeuf
2019-10-23  9:04                                               ` Miroslav Benes
2019-10-16  6:51                             ` Miroslav Benes
2019-10-16  9:23                               ` Peter Zijlstra
2019-10-16  9:36                                 ` Jessica Yu
2019-10-16  9:51                                   ` Peter Zijlstra
2019-10-16 12:39                               ` Peter Zijlstra
2019-10-22  8:45                                 ` Miroslav Benes
2019-10-15 14:42                         ` Peter Zijlstra
2019-10-15 18:31                           ` Peter Zijlstra
2019-10-15 15:51                         ` Jessica Yu
2019-10-15 13:28                     ` Steven Rostedt
2019-10-15 13:42                       ` Peter Zijlstra
2019-10-15 16:09                       ` Jessica Yu
2019-10-07  8:17   ` [PATCH v3 6/6] x86/mm: Remove set_kernel_text_r[ow]() Peter Zijlstra
2019-10-08 15:07   ` [PATCH v3 0/6] Rewrite x86/ftrace to use text_poke() Steven Rostedt
2019-10-07  8:25 ` [PATCH v2 0/4] Propagate module notifier errors Peter Zijlstra
2019-10-07  8:25   ` [PATCH v2 1/4] notifier: Fix broken error handling pattern Peter Zijlstra
2019-10-10 22:01     ` Rafael J. Wysocki
2019-10-07  8:25   ` [PATCH v2 2/4] module: Fix up module_notifier return values Peter Zijlstra
2019-10-23 19:25     ` Steven Rostedt
2019-10-07  8:25   ` [PATCH v2 3/4] module: Properly propagate MODULE_STATE_COMING failure Peter Zijlstra
2019-10-08 13:08     ` Miroslav Benes
2019-10-07  8:25   ` [PATCH v2 4/4] jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved Peter Zijlstra
2019-10-23 19:29     ` Steven Rostedt
2019-10-07  8:27 ` [PATCH v2 00/13] Add static_call() Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 01/13] compiler.h: Make __ADDRESSABLE() symbol truly unique Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 02/13] static_call: Add basic static call infrastructure Peter Zijlstra
2019-10-07 11:33     ` Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 03/13] static_call: Add inline " Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 04/13] static_call: Avoid kprobes on inline static_call()s Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 05/13] x86/static_call: Add out-of-line static call implementation Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 06/13] x86/static_call: Add inline static call implementation for x86-64 Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 07/13] static_call: Simple self-test Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 08/13] tracepoints: Use static_call Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 09/13] x86/alternatives: Teach text_poke_bp() to emulate RET Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 10/13] static_call: Add static_cond_call() Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 11/13] static_call: Handle tail-calls Peter Zijlstra
2019-10-07  8:27   ` [PATCH v2 12/13] static_call: Allow early init Peter Zijlstra
2019-10-07  8:27   ` [RFC][PATCH v2 13/13] x86/perf, static_call: Optimize x86_pmu methods Peter Zijlstra
2019-10-07 11:33   ` [PATCH v2 00/13] Add static_call() Peter Zijlstra
2019-10-07  8:44 ` [RFC][PATCH 0/9] Variable size jump_label support Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 1/9] jump_label, x86: Strip ASM " Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 2/9] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 3/9] jump_label, x86: Remove init NOP optimization Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 4/9] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 5/9] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 6/9] jump_label, x86: Add variable length patching support Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 7/9] jump_label,objtool: Validate variable size jump labels Peter Zijlstra
2019-10-07  8:44   ` [RFC][PATCH 8/9] jump_label,objtool: Generate possible statistics Peter Zijlstra
2019-10-07  8:44   ` Peter Zijlstra [this message]
2019-10-07 12:07   ` [RFC][PATCH 0/9] Variable size jump_label support Peter Zijlstra
2019-10-07 12:55     ` Ingo Molnar
2019-10-07 15:08       ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191007090012.28803430.0@infradead.org \
    --to=peterz@infradead.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bristot@redhat.com \
    --cc=hjl.tools@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jbaron@akamai.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namit@vmware.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).