From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Google-Smtp-Source: AH8x227fU44Vp8dRUuUPjIBCdFEpgjoTFUXkBo17mLxdfGoRaY3/aFx2FK2leHi+uahMwzl/A+qo ARC-Seal: i=1; a=rsa-sha256; t=1517070289; cv=none; d=google.com; s=arc-20160816; b=Cm/A7st7OM+Bio2YAifuV3JTJX1DMXLddlUQEry3aQ/CGRXB/UO8D3x0ySkQ626mYC Dd0T3YuMr2ixXvhmSC1uT1UdqsPg+IcDyn/Jwr3Mvoccy8ZCFmPaPVeZMbmc14wt9dE+ ES8fOmgTXMgmnLTLptHAhw8qdRqESjwLo3hZiTuzFHgmbwQYbnwntSzf+6N7MyyXO0ia YFfMRGrQGP+qIp3IrbXtE+IxAFG10LAeMB1YoFatmaGPz+F5alwZhJbsdfq4s9v6RrsH VRTzeoVjQk8/4tqxJec974ws99yRPy2UKb8mkpzrs8l32YYhEKfCRc/4fB8zXNEmRVG+ UorA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:to:from:dkim-signature :arc-authentication-results; bh=RM7OqXIgglOkA742Lp9jNgh9KTpdmRYEe3WpF261jOk=; b=HCDdcHOry20FmmKD3TBfJ7roc60dgMm6c2HqOGap4+VuuvND2BOAcOHKxSyPX0qYXq l8qMEnEqU1W4bveGrqOv+p6BIC4BYYI9IqM3UYEw9sOTIidMb7fG4sith10EUJ1C4udn EMGw9vPeGDYxB5MfIkD2RFuLvhbV3baSKTEdMUAq3+sc/xbAwB7GnNPvGyOgmIbwU99B qTQ5EFZ/RToaygUo2LPrg4YKq9OGO1nzE4J+1JeVYGsU2uAJrLifHMB3F6Q3gsLf71lN GM6NMvEF5PtsjPhEHiR/7dCnyN8JLEizrDi1sUvYOyzdXZUd3nIyt9gb6DbfVkxBbNJh 5zJA== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@amazon.co.uk header.s=amazon201209 header.b=dj2OJ+c1; spf=pass (google.com: domain of prvs=558ede6c8=dwmw@amazon.com designates 52.95.49.90 as permitted sender) smtp.mailfrom=prvs=558ede6c8=dwmw@amazon.com; dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=amazon.co.uk Authentication-Results: mx.google.com; dkim=pass header.i=@amazon.co.uk header.s=amazon201209 header.b=dj2OJ+c1; spf=pass (google.com: domain of prvs=558ede6c8=dwmw@amazon.com designates 52.95.49.90 as permitted sender) smtp.mailfrom=prvs=558ede6c8=dwmw@amazon.com; dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=amazon.co.uk X-IronPort-AV: E=Sophos;i="5.46,422,1511827200"; d="scan'208";a="329399996" From: David Woodhouse To: arjan@linux.intel.com, tglx@linutronix.de, karahmed@amazon.de, x86@kernel.org, linux-kernel@vger.kernel.org, tim.c.chen@linux.intel.com, bp@alien8.de, peterz@infradead.org, pbonzini@redhat.com, ak@linux.intel.com, torvalds@linux-foundation.org, gregkh@linux-foundation.org, dave.hansen@intel.com Subject: [PATCH 2/3] x86/retpoline: Simplify vmexit_fill_RSB() Date: Sat, 27 Jan 2018 16:24:33 +0000 Message-Id: <1517070274-12128-3-git-send-email-dwmw@amazon.co.uk> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1517070274-12128-1-git-send-email-dwmw@amazon.co.uk> References: <1517070274-12128-1-git-send-email-dwmw@amazon.co.uk> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-getmail-retrieved-from-mailbox: INBOX X-GMAIL-THRID: =?utf-8?q?1590763496213350138?= X-GMAIL-MSGID: =?utf-8?q?1590763496213350138?= X-Mailing-List: linux-kernel@vger.kernel.org List-ID: From: Borislav Petkov Simplify it to call an asm-function instead of pasting 41 insn bytes at every call site. Also, add alignment to the macro as suggested here: https://support.google.com/faqs/answer/7625886 [dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 ++ arch/x86/include/asm/nospec-branch.h | 70 ++++------------------------------- arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 56 ++++++++++++++++++++++++++++ 6 files changed, 71 insertions(+), 65 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 60c4c34..2a35b1e 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,7 +252,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %ebx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 63f4320..b4f0098 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -495,7 +495,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %rbx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 1908214..4d11161 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,4 +38,7 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) +asmlinkage void __fill_rsb(void); +asmlinkage void __clear_rsb(void); + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ecb54..df4ececa 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,50 +7,6 @@ #include #include -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * We define a CPP macro such that it can be used from both .S files and - * inline assembly. It's possible to do a .macro and then include that - * from C via asm(".include ") but let's not go there. - */ - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -/* - * Google experimented with loop-unrolling and this turned out to be - * the optimal version — two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - call 772f; \ -773: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - call 774f; \ -775: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; - #ifdef __ASSEMBLY__ /* @@ -121,17 +77,10 @@ #endif .endm - /* - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP - * monstrosity above, manually. - */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +/* This clobbers the BX register */ +.macro FILL_RETURN_BUFFER nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr -.Lskip_rsb_\@: + ALTERNATIVE "", "call __clear_rsb", \ftr #endif .endm @@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); + alternative_input("", + "call __fill_rsb", + X86_FEATURE_RETPOLINE, + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d435c89..d0a3170 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_RETPOLINE) += retpoline.o +OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c909961..3040848 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,6 +7,7 @@ #include #include #include +#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -46,3 +47,58 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +.macro STUFF_RSB nr:req sp:req + mov $(\nr / 2), %_ASM_BX + .align 16 +771: + call 772f +773: /* speculation trap */ + pause + lfence + jmp 773b + .align 16 +772: + call 774f +775: /* speculation trap */ + pause + lfence + jmp 775b + .align 16 +774: + dec %_ASM_BX + jnz 771b + add $((BITS_PER_LONG/8) * \nr), \sp +.endm + +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +ENTRY(__fill_rsb) + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP + ret +END(__fill_rsb) +EXPORT_SYMBOL_GPL(__fill_rsb) + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +ENTRY(__clear_rsb) + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP + ret +END(__clear_rsb) +EXPORT_SYMBOL_GPL(__clear_rsb) -- 2.7.4