linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org, jpoimboe@redhat.com, jgross@suse.com, mbenes@suse.com
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org
Subject: [PATCH v2 03/14] x86/retpoline: Simplify retpolines
Date: Thu, 18 Mar 2021 18:11:06 +0100	[thread overview]
Message-ID: <20210318171919.580212227@infradead.org> (raw)
In-Reply-To: 20210318171103.577093939@infradead.org

Due to commit c9c324dc22aa ("objtool: Support stack layout changes
in alternatives"), it is possible to simplify the retpolines.

Currently our retpolines consist of 2 symbols,
__x86_indirect_thunk_\reg, which is the compiler target, and
__x86_retpoline_\reg, which is the actual retpoline. Both are
consecutive in code and aligned such that for any one register they
both live in the same cacheline:

  0000000000000000 <__x86_indirect_thunk_rax>:
   0:   ff e0                   jmpq   *%rax
   2:   90                      nop
   3:   90                      nop
   4:   90                      nop

  0000000000000005 <__x86_retpoline_rax>:
   5:   e8 07 00 00 00          callq  11 <__x86_retpoline_rax+0xc>
   a:   f3 90                   pause
   c:   0f ae e8                lfence
   f:   eb f9                   jmp    a <__x86_retpoline_rax+0x5>
  11:   48 89 04 24             mov    %rax,(%rsp)
  15:   c3                      retq
  16:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)

The thunk is an alternative_2, where one option is a jmp to the
retpoline. This was done so that objtool didn't need to deal with
alternatives with stack ops. But that problem has been solved, so now
it is possible to fold the entire retpoline into the alternative to
simplify and consolidate unused bytes:

  0000000000000000 <__x86_indirect_thunk_rax>:
   0:   ff e0                   jmpq   *%rax
   2:   90                      nop
   3:   90                      nop
   4:   90                      nop
   5:   90                      nop
   6:   90                      nop
   7:   90                      nop
   8:   90                      nop
   9:   90                      nop
   a:   90                      nop
   b:   90                      nop
   c:   90                      nop
   d:   90                      nop
   e:   90                      nop
   f:   90                      nop
  10:   90                      nop
  11:   66 66 2e 0f 1f 84 00 00 00 00 00        data16 nopw %cs:0x0(%rax,%rax,1)
  1c:   0f 1f 40 00             nopl   0x0(%rax)

Notice that since the longest alternative sequence is now:

   0:   e8 07 00 00 00          callq  c <.altinstr_replacement+0xc>
   5:   f3 90                   pause
   7:   0f ae e8                lfence
   a:   eb f9                   jmp    5 <.altinstr_replacement+0x5>
   c:   48 89 04 24             mov    %rax,(%rsp)
  10:   c3                      retq

17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW,
if we can shrink the retpoline by 1 byte we can pack it more dense)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/asm-prototypes.h |    7 -------
 arch/x86/include/asm/nospec-branch.h  |    6 +++---
 arch/x86/lib/retpoline.S              |   34 +++++++++++++++++-----------------
 tools/objtool/check.c                 |    3 +--
 4 files changed, 21 insertions(+), 29 deletions(-)

--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
 	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
-#define DECL_RETPOLINE(reg) \
-	extern asmlinkage void __x86_retpoline_ ## reg (void);
-
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
-#undef GEN
-#define GEN(reg) DECL_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>
-
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -81,7 +81,7 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
-		      __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
 		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	jmp	*%\reg
@@ -91,7 +91,7 @@
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
-		      __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
 		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	call	*%\reg
@@ -129,7 +129,7 @@
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	"call __x86_retpoline_%V[thunk_target]\n",		\
+	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,27 +10,31 @@
 #include <asm/unwind_hints.h>
 #include <asm/frame.h>
 
-.macro THUNK reg
-	.section .text.__x86.indirect_thunk
-
-	.align 32
-SYM_FUNC_START(__x86_indirect_thunk_\reg)
-	JMP_NOSPEC \reg
-SYM_FUNC_END(__x86_indirect_thunk_\reg)
-
-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+.macro RETPOLINE reg
 	ANNOTATE_INTRA_FUNCTION_CALL
-	call	.Ldo_rop_\@
+	call    .Ldo_rop_\@
 .Lspec_trap_\@:
 	UNWIND_HINT_EMPTY
 	pause
 	lfence
-	jmp	.Lspec_trap_\@
+	jmp .Lspec_trap_\@
 .Ldo_rop_\@:
-	mov	%\reg, (%_ASM_SP)
+	mov     %\reg, (%_ASM_SP)
 	UNWIND_HINT_FUNC
 	ret
-SYM_FUNC_END(__x86_retpoline_\reg)
+.endm
+
+.macro THUNK reg
+	.section .text.__x86.indirect_thunk
+
+	.align 32
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
+
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+		      __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
 
 .endm
 
@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg)
 
 #define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
 #define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
 
 #undef GEN
 #define GEN(reg) THUNK reg
@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
-#undef GEN
-#define GEN(reg) EXPORT_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -800,8 +800,7 @@ static int add_jump_destinations(struct
 		} else if (reloc->sym->type == STT_SECTION) {
 			dest_sec = reloc->sym->sec;
 			dest_off = arch_dest_reloc_offset(reloc->addend);
-		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
-			   !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
 			/*
 			 * Retpoline jumps are really dynamic jumps in
 			 * disguise, so convert them accordingly.



  parent reply	other threads:[~2021-03-18 17:24 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-18 17:11 [PATCH v2 00/14] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 01/14] x86: Add insn_decode_kernel() Peter Zijlstra
2021-03-19 10:40   ` Borislav Petkov
2021-03-18 17:11 ` [PATCH v2 02/14] x86/alternatives: Optimize optimize_nops() Peter Zijlstra
2021-03-21 12:06   ` Borislav Petkov
2021-03-22  8:17     ` Peter Zijlstra
2021-03-22 11:07       ` Borislav Petkov
2021-03-18 17:11 ` Peter Zijlstra [this message]
2021-03-19 17:18   ` [PATCH v2 03/14] x86/retpoline: Simplify retpolines David Laight
2021-03-22  9:32     ` Peter Zijlstra
2021-03-22 15:41       ` David Laight
2021-03-18 17:11 ` [PATCH v2 04/14] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 05/14] objtool: Per arch retpoline naming Peter Zijlstra
2021-03-19  2:38   ` Josh Poimboeuf
2021-03-19  9:07     ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 06/14] objtool: Fix static_call list generation Peter Zijlstra
2021-03-22 12:44   ` Miroslav Benes
2021-03-18 17:11 ` [PATCH v2 07/14] objtool: Rework rebuild_reloc logic Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 08/14] objtool: Add elf_create_reloc() helper Peter Zijlstra
2021-03-19  1:42   ` Josh Poimboeuf
2021-03-19  9:47     ` Peter Zijlstra
2021-03-19 15:12       ` Josh Poimboeuf
2021-03-19 15:24         ` Peter Zijlstra
2021-03-19 15:37           ` Josh Poimboeuf
2021-03-18 17:11 ` [PATCH v2 09/14] objtool: Extract elf_strtab_concat() Peter Zijlstra
2021-03-19  2:10   ` Josh Poimboeuf
2021-03-19  9:52     ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 10/14] objtool: Extract elf_symbol_add() Peter Zijlstra
2021-03-19  2:14   ` Josh Poimboeuf
2021-03-19  9:54     ` Peter Zijlstra
2021-03-19 15:04       ` Josh Poimboeuf
2021-03-18 17:11 ` [PATCH v2 11/14] objtool: Add elf_create_undef_symbol() Peter Zijlstra
2021-03-19  2:29   ` Josh Poimboeuf
2021-03-19  7:56     ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 12/14] objtool: Allow archs to rewrite retpolines Peter Zijlstra
2021-03-19  2:54   ` Josh Poimboeuf
2021-03-19 11:21     ` Peter Zijlstra
2021-03-19 13:28       ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 13/14] objtool: Skip magical retpoline .altinstr_replacement Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 14/14] objtool,x86: Rewrite retpoline thunk calls Peter Zijlstra
2021-03-19  3:29   ` Josh Poimboeuf
2021-03-19  8:06     ` Peter Zijlstra
2021-03-19 15:30       ` Josh Poimboeuf
2021-03-19 15:56         ` Peter Zijlstra
2021-03-19 22:52           ` Josh Poimboeuf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210318171919.580212227@infradead.org \
    --to=peterz@infradead.org \
    --cc=jgross@suse.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mbenes@suse.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).