From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com,
jbaron@akamai.com, torvalds@linux-foundation.org,
tglx@linutronix.de, mingo@kernel.org, namit@vmware.com,
hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org,
jpoimboe@redhat.com, hjl.tools@gmail.com
Subject: [RFC][PATCH 9/9] jump_label, x86: Enable JMP8/NOP2 support
Date: Mon, 07 Oct 2019 10:44:52 +0200 [thread overview]
Message-ID: <20191007090012.28803430.0@infradead.org> (raw)
In-Reply-To: 20191007084443.79370128.1@infradead.org
Enable and emit short JMP/NOP jump_label entries.
A lot of the jumps are in fact short, like around tracepoints:
0000 0000000000000920 <native_read_msr>: | 0000 0000000000000920 <native_read_msr>:
0000 920: 53 push %rbx | 0000 920: 53 push %rbx
0001 921: 89 f9 mov %edi,%ecx | 0001 921: 89 f9 mov %edi,%ecx
0003 923: 0f 32 rdmsr | 0003 923: 0f 32 rdmsr
0005 925: 48 c1 e2 20 shl $0x20,%rdx | 0005 925: 48 c1 e2 20 shl $0x20,%rdx
0009 929: 48 89 d3 mov %rdx,%rbx | 0009 929: 48 89 d3 mov %rdx,%rbx
000c 92c: 48 09 c3 or %rax,%rbx | 000c 92c: 48 09 c3 or %rax,%rbx
000f 92f: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) \ 000f 92f: 66 90 xchg %ax,%ax
0014 934: 48 89 d8 mov %rbx,%rax \ 0011 931: 48 89 d8 mov %rbx,%rax
0017 937: 5b pop %rbx \ 0014 934: 5b pop %rbx
0018 938: c3 retq \ 0015 935: c3 retq
0019 939: 48 89 de mov %rbx,%rsi \ 0016 936: 48 89 de mov %rbx,%rsi
001c 93c: 31 d2 xor %edx,%edx \ 0019 939: 31 d2 xor %edx,%edx
001e 93e: e8 00 00 00 00 callq 943 <native_read_msr+0x23> \ 001b 93b: e8 00 00 00 00 callq 940 <native_read_msr+0x20>
001f 93f: R_X86_64_PLT32 do_trace_read_msr-0x4 \ 001c 93c: R_X86_64_PLT32 do_trace_read_msr-0x4
0023 943: 48 89 d8 mov %rbx,%rax \ 0020 940: 48 89 d8 mov %rbx,%rax
0026 946: 5b pop %rbx \ 0023 943: 5b pop %rbx
0027 947: c3 retq \ 0024 944: c3 retq
.rela__jump_table
000000000010 000200000002 R_X86_64_PC32 0000000000000000 .text + 92f
000000000014 000200000002 R_X86_64_PC32 0000000000000000 .text + 939 (or 936)
000000000018 014500000018 R_X86_64_PC64 0000000000000000 __tracepoint_read_msr + 8
The below patch works as long as the jump doesn't cross sections; the
moment GCC generates a branch crossing sections and feeds it into our
asm-goto things come apart like:
/tmp/ccM70dCh.s: Assembler messages:
/tmp/ccM70dCh.s: Error: invalid operands (.text.unlikely and .text sections) for `-' when setting `disp'
../arch/x86/include/asm/jump_label.h:39: Error: invalid operands (.text.unlikely and *ABS* sections) for `>>'
../arch/x86/include/asm/jump_label.h:39: Error: invalid operands (.text.unlikely and *ABS* sections) for `>>'
Which is really unfortunate since it is a completely sane thing to
happen. We really need a GAS extention to handle this :-/
All we really need is to detect the two offsets are from different
sections and punt to the 5 byte nop. But AFAICT there is nothing that
can do that.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: "H.J. Lu" <hjl.tools@gmail.com>
---
arch/x86/Kconfig | 10 ++++++++++
arch/x86/include/asm/jump_label.h | 36 +++++++++++++++++++++++++++++++++++-
arch/x86/kernel/jump_label.c | 17 +++++++++++++++++
3 files changed, 62 insertions(+), 1 deletion(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -230,6 +230,16 @@ config X86
select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
+#
+# This mostly depends on the asm ".nops 5" directive existing and emitting a
+# single instruction nop, this is true for x86_64, but not for i386, which
+# violates the single instruction constraint.
+#
+config CC_HAS_ASM_NOPS
+ def_bool y
+ depends on X86_64
+ depends on $(success,echo 'void foo(void) { asm inline (".nops 5"); }' | $(CC) -x c - -c -o /dev/null)
+
config INSTRUCTION_DECODER
def_bool y
depends on KPROBES || PERF_EVENTS || UPROBES
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,6 +4,10 @@
#define HAVE_JUMP_LABEL_BATCH
+#ifdef CONFIG_CC_HAS_ASM_NOPS
+#define HAVE_JUMP_LABEL_VARIABLE
+#endif
+
#ifdef CONFIG_X86_64
# define STATIC_KEY_NOP2 P6_NOP2
# define STATIC_KEY_NOP5 P6_NOP5_ATOMIC
@@ -31,7 +35,29 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+ /*
+ * This comes apart mightily when %[l_yes] and 1b are in
+ * different sections; like for instance .text and
+ * .text.unlikely. Sadly there is nothing to actually detect
+ * and handle this case explicitly.
+ *
+ * GAS sucks!!
+ */
+ ".set disp, (%l[l_yes]) - (1b + 2) \n\t"
+ ".set res, (disp >> 31) == (disp >> 7) \n\t"
+ ".set is_byte, -res \n\t"
+ ".set is_long, -(~res) \n\t"
+
+ /*
+ * This relies on .nops:
+ * - matching the above STATIC_KEY_NOP* bytes
+ * - emitting a single instruction nop for 2 and 5 bytes.
+ */
+ ".nops (2*is_byte) + (5*is_long)\n\t"
+#else
".byte " __stringify(STATIC_KEY_NOP5) "\n\t"
+#endif
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -43,8 +69,13 @@ static __always_inline bool arch_static_
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+ "jmp %l[l_yes] \n\t"
+#else
+ /* Equivalent to "jmp.d32 \target" */
".byte 0xe9 \n\t"
".long %l[l_yes] - (. + 4) \n\t"
+#endif
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -59,9 +90,12 @@ extern int arch_jump_entry_size(struct j
.macro STATIC_BRANCH_FALSE_LIKELY target, key
.Lstatic_jump_\@:
- /* Equivalent to "jmp.d32 \target" */
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+ jmp \target
+#else
.byte 0xe9
.long \target - (. + 4)
+#endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -18,7 +18,24 @@
int arch_jump_entry_size(struct jump_entry *entry)
{
+#ifdef HAVE_JUMP_LABEL_VARIABLE
+ struct insn insn;
+
+ /*
+ * Because the instruction size heuristic doesn't purely rely on
+ * displacement, but also on section, and we're hindered by GNU as UB
+ * to emit the assemble time choice, we have to discover the size at
+ * runtime.
+ */
+ kernel_insn_init(&insn, (void *)jump_entry_code(entry), MAX_INSN_SIZE);
+ insn_get_length(&insn);
+ BUG_ON(!insn_complete(&insn));
+ BUG_ON(insn.length != 2 && insn.length != 5);
+
+ return insn.length;
+#else
return JMP32_INSN_SIZE;
+#endif
}
struct jump_label_patch {
next prev parent reply other threads:[~2019-10-07 11:24 UTC|newest]
Thread overview: 128+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-07 9:02 [RESEND] everything text-poke: ftrace, modules, static_call and jump_label Peter Zijlstra
2019-10-07 8:17 ` [PATCH v3 0/6] Rewrite x86/ftrace to use text_poke() Peter Zijlstra
2019-10-07 8:17 ` [PATCH v3 1/6] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-10-08 14:29 ` Borislav Petkov
2019-10-08 14:40 ` Steven Rostedt
2019-10-08 14:50 ` Borislav Petkov
2019-10-08 14:48 ` Peter Zijlstra
2019-10-08 14:54 ` Borislav Petkov
2019-10-08 15:04 ` Steven Rostedt
2019-10-08 15:24 ` Borislav Petkov
2019-10-09 12:03 ` Daniel Bristot de Oliveira
2019-10-07 8:17 ` [PATCH v3 2/6] x86/alternatives: Update int3_emulate_push() comment Peter Zijlstra
2019-10-07 8:17 ` [PATCH v3 3/6] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra
2019-10-09 12:04 ` Daniel Bristot de Oliveira
2019-10-07 8:17 ` [PATCH v3 4/6] x86/alternatives: Add and use text_gen_insn() helper Peter Zijlstra
2019-10-08 6:23 ` Masami Hiramatsu
2019-10-08 8:15 ` Peter Zijlstra
2019-10-07 8:17 ` [PATCH v3 5/6] x86/ftrace: Use text_poke() Peter Zijlstra
2019-10-08 14:43 ` Steven Rostedt
2019-10-08 17:11 ` Peter Zijlstra
2019-10-08 17:27 ` Steven Rostedt
2019-10-10 2:41 ` Steven Rostedt
2019-10-10 9:20 ` Peter Zijlstra
2019-10-10 13:19 ` Steven Rostedt
2019-10-10 14:05 ` Peter Zijlstra
2019-10-10 15:54 ` Steven Rostedt
2019-10-10 17:28 ` Peter Zijlstra
2019-10-10 17:48 ` Steven Rostedt
2019-10-11 10:45 ` Peter Zijlstra
2019-10-11 10:47 ` Peter Zijlstra
2019-10-11 10:50 ` Peter Zijlstra
2019-10-11 12:59 ` Peter Zijlstra
2019-10-11 13:33 ` Steven Rostedt
2019-10-11 13:45 ` Peter Zijlstra
2019-10-15 13:07 ` Jessica Yu
2019-10-15 13:56 ` Peter Zijlstra
2019-10-15 14:11 ` Peter Zijlstra
2019-10-15 14:13 ` Miroslav Benes
2019-10-15 15:06 ` Joe Lawrence
2019-10-15 15:31 ` Jessica Yu
2019-10-15 22:17 ` Joe Lawrence
2019-10-15 22:27 ` Steven Rostedt
2019-10-16 7:42 ` Peter Zijlstra
2019-10-16 10:15 ` Miroslav Benes
2019-10-21 15:05 ` Josh Poimboeuf
2020-01-20 16:50 ` Josh Poimboeuf
2020-01-21 8:35 ` Miroslav Benes
2020-01-21 16:10 ` Josh Poimboeuf
2020-01-22 10:09 ` Miroslav Benes
2020-01-22 21:42 ` Josh Poimboeuf
2020-01-28 9:28 ` Miroslav Benes
2020-01-28 15:00 ` Josh Poimboeuf
2020-01-28 15:40 ` Petr Mladek
2020-01-28 17:02 ` Josh Poimboeuf
2020-01-29 0:46 ` Jiri Kosina
2020-01-29 2:17 ` Josh Poimboeuf
2020-01-29 3:14 ` Jiri Kosina
2020-01-29 12:28 ` Miroslav Benes
2020-01-29 15:59 ` Josh Poimboeuf
2020-01-30 9:53 ` Petr Mladek
2020-01-30 14:17 ` Josh Poimboeuf
2020-01-31 7:17 ` Petr Mladek
2020-01-22 12:15 ` Miroslav Benes
2020-01-22 15:05 ` Miroslav Benes
2020-01-22 22:03 ` Josh Poimboeuf
2020-01-23 10:19 ` Martin Jambor
2019-10-16 7:49 ` Peter Zijlstra
2019-10-16 10:20 ` Miroslav Benes
2019-10-16 13:29 ` Miroslav Benes
2019-10-18 13:03 ` Jessica Yu
2019-10-18 13:40 ` Petr Mladek
2019-10-21 14:14 ` Jessica Yu
2019-10-21 15:31 ` Josh Poimboeuf
2019-10-22 8:27 ` Miroslav Benes
2019-10-22 14:31 ` Josh Poimboeuf
2019-10-23 9:04 ` Miroslav Benes
2019-10-16 6:51 ` Miroslav Benes
2019-10-16 9:23 ` Peter Zijlstra
2019-10-16 9:36 ` Jessica Yu
2019-10-16 9:51 ` Peter Zijlstra
2019-10-16 12:39 ` Peter Zijlstra
2019-10-22 8:45 ` Miroslav Benes
2019-10-15 14:42 ` Peter Zijlstra
2019-10-15 18:31 ` Peter Zijlstra
2019-10-15 15:51 ` Jessica Yu
2019-10-15 13:28 ` Steven Rostedt
2019-10-15 13:42 ` Peter Zijlstra
2019-10-15 16:09 ` Jessica Yu
2019-10-07 8:17 ` [PATCH v3 6/6] x86/mm: Remove set_kernel_text_r[ow]() Peter Zijlstra
2019-10-08 15:07 ` [PATCH v3 0/6] Rewrite x86/ftrace to use text_poke() Steven Rostedt
2019-10-07 8:25 ` [PATCH v2 0/4] Propagate module notifier errors Peter Zijlstra
2019-10-07 8:25 ` [PATCH v2 1/4] notifier: Fix broken error handling pattern Peter Zijlstra
2019-10-10 22:01 ` Rafael J. Wysocki
2019-10-07 8:25 ` [PATCH v2 2/4] module: Fix up module_notifier return values Peter Zijlstra
2019-10-23 19:25 ` Steven Rostedt
2019-10-07 8:25 ` [PATCH v2 3/4] module: Properly propagate MODULE_STATE_COMING failure Peter Zijlstra
2019-10-08 13:08 ` Miroslav Benes
2019-10-07 8:25 ` [PATCH v2 4/4] jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved Peter Zijlstra
2019-10-23 19:29 ` Steven Rostedt
2019-10-07 8:27 ` [PATCH v2 00/13] Add static_call() Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 01/13] compiler.h: Make __ADDRESSABLE() symbol truly unique Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 02/13] static_call: Add basic static call infrastructure Peter Zijlstra
2019-10-07 11:33 ` Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 03/13] static_call: Add inline " Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 04/13] static_call: Avoid kprobes on inline static_call()s Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 05/13] x86/static_call: Add out-of-line static call implementation Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 06/13] x86/static_call: Add inline static call implementation for x86-64 Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 07/13] static_call: Simple self-test Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 08/13] tracepoints: Use static_call Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 09/13] x86/alternatives: Teach text_poke_bp() to emulate RET Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 10/13] static_call: Add static_cond_call() Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 11/13] static_call: Handle tail-calls Peter Zijlstra
2019-10-07 8:27 ` [PATCH v2 12/13] static_call: Allow early init Peter Zijlstra
2019-10-07 8:27 ` [RFC][PATCH v2 13/13] x86/perf, static_call: Optimize x86_pmu methods Peter Zijlstra
2019-10-07 11:33 ` [PATCH v2 00/13] Add static_call() Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 0/9] Variable size jump_label support Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 1/9] jump_label, x86: Strip ASM " Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 2/9] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 3/9] jump_label, x86: Remove init NOP optimization Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 4/9] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 5/9] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 6/9] jump_label, x86: Add variable length patching support Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 7/9] jump_label,objtool: Validate variable size jump labels Peter Zijlstra
2019-10-07 8:44 ` [RFC][PATCH 8/9] jump_label,objtool: Generate possible statistics Peter Zijlstra
2019-10-07 8:44 ` Peter Zijlstra [this message]
2019-10-07 12:07 ` [RFC][PATCH 0/9] Variable size jump_label support Peter Zijlstra
2019-10-07 12:55 ` Ingo Molnar
2019-10-07 15:08 ` Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191007090012.28803430.0@infradead.org \
--to=peterz@infradead.org \
--cc=ard.biesheuvel@linaro.org \
--cc=bristot@redhat.com \
--cc=hjl.tools@gmail.com \
--cc=hpa@zytor.com \
--cc=jbaron@akamai.com \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namit@vmware.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.