On Fri, 2018-01-05 at 09:28 -0800, Linus Torvalds wrote: > > Yes, I would suggest against expecting altinstructions to have > relocation information. They are generated in a different place, so.. > > That said, I honestly like the inline version (the one that is in the > google paper first) of the retpoline more than the out-of-line one. > And that one shouldn't have any relocation issues, because all the > offsets are relative. Note that the *only* issue with the relocation is that it pushes me to use X86_FEATURE_NO_RETPOLINE for my feature instead of X86_FEATURE_RETPOLINE as might be natural. And actually there's a motivation to do that anyway, because of the way three-way alternatives interact. With the existing negative flag I can do   ALTERNATIVE_2(retpoline, K8: lfence+jmp; NO_RETPOLINE: jmp) But if I invert it, I think I need two feature flags to get the same functionality — X86_FEATURE_RETPOLINE and X86_FEATURE_RETPOLINE_AMD:  ALTERNATIVE_2(jmp, RETPOLINE: retpoline, RETPOLINE_AMD: lfence+jmp) So I was completely prepared to live with the slightly unnatural inverse logic of the feature flag. But since you asked... > We want to use that one for the entry stub anyway, can't we just > standardize on that one for all our assembly? > > If the *compiler* uses the out-of-line version, that's a separate > thing. But for our asm cases, let's just make it all be the inline > case, ok? OK.... it starts off looking a bit like this. You're right; with the caveats above it will let me invert the logic to X86_FEATURE_RETPOLINE because the alternatives mechanism no longer needs to adjust any part of the retpoline code path when it's in 'altinstr'. And it does let me use a simple NOSPEC_JMP in the entry trampoline instead of open-coding it again, which is nice. But the first pass of it, below, is fugly as hell. I'll take another look at *using* the ALTERNATIVE_2 macro instead of reimplementing it for NOSPEC_CALL, but I strongly suspect that's just going to land me with a fairly unreadable __stringify(jmp;call;lfence;jmp;call;mov;ret) monstrosity all on a single line. Assembler macros are... brittle. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 76f94bbacaec..8f7e1129f493 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -188,17 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline)    */   pushq %rdi   movq $entry_SYSCALL_64_stage2, %rdi - /* -  * Open-code the retpoline from retpoline.S, because we can't -  * just jump to it directly. -  */ - ALTERNATIVE "call 2f", "jmp *%rdi", X86_FEATURE_NO_RETPOLINE -1: - lfence - jmp 1b -2: - mov %rdi, (%rsp) - ret + NOSPEC_JMP rdi  END(entry_SYSCALL_64_trampoline)     .popsection diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index eced0dfaddc9..1c8312ff186a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -14,15 +14,54 @@   */  .macro NOSPEC_JMP reg:req  #ifdef CONFIG_RETPOLINE - ALTERNATIVE __stringify(jmp __x86.indirect_thunk.\reg), __stringify(jmp *%\reg), X86_FEATURE_NO_RETPOLINE + ALTERNATIVE_2 "call 1112f", __stringify(lfence;jmp *%\reg), X86_FEATURE_K8, __stringify(jmp *%\reg), X86_FEATURE_NO_RETPOLINE +1111: + lfence + jmp 1111b +1112: + mov %\reg, (%_ASM_SP) + ret  #else - jmp *%\reg + jmp *%\reg  #endif  .endm   +/* + * Even __stringify() on the arguments doesn't really make it nice to use + * the existing ALTERNATIVE_2 macro here. So open-code our own version... + */  .macro NOSPEC_CALL reg:req  #ifdef CONFIG_RETPOLINE - ALTERNATIVE __stringify(call __x86.indirect_thunk.\reg), __stringify(call *%\reg), X86_FEATURE_NO_RETPOLINE +140: + jmp 1113f +1110: + call 1112f +1111: + lfence + jmp 1111b +1112: + mov %\reg, (%_ASM_SP) + ret +1113: + call 1110b +141: + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,X86_FEATURE_K8,142b-140b,144f-143f,142b-141b + altinstruction_entry 140b,144f,X86_FEATURE_NO_RETPOLINE,142b-140b,145f-144f,142b-141b + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + lfence + call *%\reg +144: + call *%\reg +145: + .popsection  #else   call *%\reg  #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 2a4b1f09eb84..5c15e4307da5 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -6,19 +6,14 @@  #include  #include  #include +#include    .macro THUNK sp reg   .section .text.__x86.indirect_thunk.\reg    ENTRY(__x86.indirect_thunk.\reg)   CFI_STARTPROC - ALTERNATIVE_2 "call 2f", __stringify(lfence;jmp *%\reg), X86_FEATURE_K8, __stringify(jmp *%\reg), X86_FEATURE_NO_RETPOLINE -1: - lfence - jmp 1b -2: - mov %\reg, (%\sp) - ret + NOSPEC_JMP \reg   CFI_ENDPROC  ENDPROC(__x86.indirect_thunk.\reg)  EXPORT_SYMBOL(__x86.indirect_thunk.\reg)