linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nadav Amit <namit@vmware.com>
To: Ingo Molnar <mingo@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	"H . Peter Anvin " <hpa@zytor.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	<linux-kernel@vger.kernel.org>, Nadav Amit <nadav.amit@gmail.com>,
	<x86@kernel.org>, Borislav Petkov <bp@alien8.de>,
	David Woodhouse <dwmw@amazon.co.uk>,
	Nadav Amit <namit@vmware.com>
Subject: [RFC PATCH 2/5] x86: patch indirect branch promotion
Date: Wed, 17 Oct 2018 17:54:17 -0700	[thread overview]
Message-ID: <20181018005420.82993-3-namit@vmware.com> (raw)
In-Reply-To: <20181018005420.82993-1-namit@vmware.com>

To perform indirect branch promotion, we need to find all the locations.
Retpolines make it relatively easy to find these branches, by looking at
the assembly and finding calls to the indirect thunks.

An assembly macro named CALL is used to catch all assembly calls, find
these the use indirect thunks and patch them to hold the code that is
needed for indirect branch promotion.

The build-system is slightly broken with this patch, as changes to
nospec-branch.h should trigger a full kernel rebuild, which currently
it does not.

Signed-off-by: Nadav Amit <namit@vmware.com>
---
 arch/x86/include/asm/nospec-branch.h | 119 +++++++++++++++++++++++++++
 arch/x86/kernel/Makefile             |   1 +
 arch/x86/kernel/asm-offsets.c        |   6 ++
 arch/x86/kernel/macros.S             |   1 +
 arch/x86/kernel/nospec-branch.c      |   5 ++
 arch/x86/kernel/vmlinux.lds.S        |   7 ++
 arch/x86/lib/retpoline.S             |  75 +++++++++++++++++
 7 files changed, 214 insertions(+)
 create mode 100644 arch/x86/kernel/nospec-branch.c

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 0267611eb247..bd2d3a41e88c 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,6 +7,27 @@
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
+#include <asm/percpu.h>
+
+/*
+ * Defining registers with the architectural order
+ */
+#define ARCH_RAX	0
+#define	ARCH_RCX	1
+#define ARCH_RDX	2
+#define ARCH_RBX	3
+#define ARCH_RSP	4
+#define ARCH_RBP	5
+#define ARCH_RSI	6
+#define ARCH_RDI	7
+#define ARCH_R8		8
+#define ARCH_R9		9
+#define ARCH_R10	10
+#define ARCH_R11	11
+#define ARCH_R12	12
+#define ARCH_R13	13
+#define ARCH_R14	14
+#define ARCH_R15	15
 
 /*
  * Fill the CPU return stack buffer.
@@ -28,6 +49,9 @@
 #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
 #define RSB_FILL_LOOPS		16	/* To avoid underflow */
 
+#define RELPOLINE_SAMPLES_NUM		(1 << 8)
+#define RELPOLINE_SAMPLES_MASK		(RELPOLINE_SAMPLES_NUM - 1)
+
 /*
  * Google experimented with loop-unrolling and this turned out to be
  * the optimal version — two calls, each with their own speculation
@@ -160,6 +184,81 @@
 #endif
 .endm
 
+/*
+ * This macro performs the actual relpoline work. The machine-code is hand
+ * coded to avoid assembler optimizations. This code is heavily patched in
+ * runtime to make it do what it should.
+ */
+.macro relpoline_call reg:req
+	# cmp instruction
+	get_reg_num reg=\reg
+.if reg_num == ARCH_RAX
+	.byte 0x48
+	.byte 0x3d
+.else
+.if reg_num >= ARCH_R8
+	.byte 0x49
+.else
+	.byte 0x48
+.endif
+	.byte 0x81
+	.byte 0xf8 | (reg_num & 7)		# modrm
+.endif
+1:
+	.long 0
+
+	.section .relpolines,"a"
+	_ASM_PTR	1b
+	.byte		reg_num
+	.previous
+
+	# cachepoling-using code
+
+	# jnz 4f, patched to jmp while the target is changed
+	preempt_disable_prefix
+	.byte	0x75, 4f - 2f
+2:
+	# call retpoline
+	preempt_disable_prefix
+	.byte 0xe8
+	.long __x86_indirect_thunk_\reg - 3f
+3:
+	# jmp 5f
+	.byte 0xeb, 5f - 4f
+4:
+	# retpoline space
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	preempt_disable_prefix
+	.byte 0xe8
+	.long save_relpoline_\reg - 5f
+5:
+.endm
+
+#define ARCH_REG_NAMES rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15
+
+.macro get_reg_num reg:req
+	i = 0
+.irp reg_it,ARCH_REG_NAMES
+	.ifc "\reg", "\reg_it"
+		reg_num=i
+	.endif
+	i = i+1
+.endr
+.endm
+
+.macro call v:vararg
+	retpoline = 0
+.irp reg_it,ARCH_REG_NAMES
+.ifc "\v", "__x86_indirect_thunk_\reg_it"
+	relpoline_call reg=\reg_it
+	retpoline = 1
+.endif
+.endr
+.if retpoline == 0
+	{disp8} call \v
+.endif
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_NOSPEC_ALTERNATIVE				\
@@ -288,6 +387,26 @@ static inline void indirect_branch_prediction_barrier(void)
 	alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
 }
 
+/* Data structure that is used during the learning stage */
+struct relpoline_sample {
+	u32 src;
+	u32 dst;
+	u32 cnt;
+	u32 padding;
+} __packed;
+
+DECLARE_PER_CPU_ALIGNED(struct relpoline_sample[RELPOLINE_SAMPLES_NUM],
+		       relpoline_samples);
+
+/*
+ * Information for relpolines as it is saved in the source.
+ */
+struct relpoline_entry {
+	void *rip;
+	u8 reg;
+} __packed;
+
+
 /* The Intel SPEC CTRL MSR base value cache */
 extern u64 x86_spec_ctrl_base;
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8824d01c0c35..8a50d304093a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -138,6 +138,7 @@ obj-$(CONFIG_X86_INTEL_UMIP)		+= umip.o
 obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
 obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
 obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
+obj-$(CONFIG_RETPOLINE)			+= nospec-branch.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 72adf6c335dc..2db2628c79cd 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -18,6 +18,7 @@
 #include <asm/bootparam.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -104,4 +105,9 @@ void common(void) {
 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
 	OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+
+	/* Relpolines */
+	OFFSET(RELPOLINE_SAMPLE_src, relpoline_sample, src);
+	OFFSET(RELPOLINE_SAMPLE_dst, relpoline_sample, dst);
+	OFFSET(RELPOLINE_SAMPLE_cnt, relpoline_sample, cnt);
 }
diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
index 161c95059044..3d79f3d62d20 100644
--- a/arch/x86/kernel/macros.S
+++ b/arch/x86/kernel/macros.S
@@ -14,3 +14,4 @@
 #include <asm/asm.h>
 #include <asm/cpufeature.h>
 #include <asm/jump_label.h>
+#include <asm/nospec-branch.h>
diff --git a/arch/x86/kernel/nospec-branch.c b/arch/x86/kernel/nospec-branch.c
new file mode 100644
index 000000000000..b3027761442b
--- /dev/null
+++ b/arch/x86/kernel/nospec-branch.c
@@ -0,0 +1,5 @@
+#include <linux/percpu.h>
+#include <asm/nospec-branch.h>
+
+DEFINE_PER_CPU_ALIGNED(struct relpoline_sample[RELPOLINE_SAMPLES_NUM],
+		       relpoline_samples);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0d618ee634ac..c62735d06d58 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -355,6 +355,13 @@ SECTIONS
 	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
 		NOSAVE_DATA
 	}
+
+	. = ALIGN(8);
+	.relpolines : AT(ADDR(.relpolines) - LOAD_OFFSET) {
+		__relpolines = .;
+		*(.relpolines)
+		__relpolines_end = .;
+	}
 #endif
 
 	/* BSS */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index c909961e678a..f30521c180db 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,6 +7,8 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/asm-offsets.h>
+#include <asm/frame.h>
 
 .macro THUNK reg
 	.section .text.__x86.indirect_thunk
@@ -45,4 +47,77 @@ GENERATE_THUNK(r12)
 GENERATE_THUNK(r13)
 GENERATE_THUNK(r14)
 GENERATE_THUNK(r15)
+
+.macro save_relpoline reg:req
+ENTRY(save_relpoline_\reg\())
+	pushq 	%rdi
+	pushq	%rsi
+	pushq	%rcx
+
+	/* First load the destination, for the case rsi is the destination */
+.if "\reg" != "rdi"
+	mov	%\reg, %rdi
+.endif
+	mov	24(%rsp), %rsi
+
+	/* Compute the xor as an index in the table */
+	mov	%rsi, %rcx
+	xor	%rdi, %rcx
+	and	$RELPOLINE_SAMPLES_MASK, %ecx
+
+	/* Entry size is 16-bit */
+	shl	$4, %ecx
+
+	movl	%esi, PER_CPU_VAR(relpoline_samples + RELPOLINE_SAMPLE_src)(%ecx)
+	movl	%edi, PER_CPU_VAR(relpoline_samples + RELPOLINE_SAMPLE_dst)(%ecx)
+	incl	PER_CPU_VAR(relpoline_samples + RELPOLINE_SAMPLE_cnt)(%ecx)
+
+#ifdef CACHEPOLINE_DEBUG
+	incl 	PER_CPU_VAR(relpoline_misses)
+#endif
+	popq	%rcx
+	popq	%rsi
+	popq	%rdi
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg\()),\
+		"jmp __x86_indirect_thunk_\reg",			\
+		X86_FEATURE_RETPOLINE
+
+ENDPROC(save_relpoline_\reg\())
+_ASM_NOKPROBE(save_relpoline_\reg\())
+EXPORT_SYMBOL(save_relpoline_\reg\())
+.endm
+
+.irp reg,ARCH_REG_NAMES
+.if \reg != "rsp"
+save_relpoline reg=\reg
+.endif
+.endr
+
+/*
+ * List of indirect thunks
+ */
+.pushsection .rodata
+.global indirect_thunks
+indirect_thunks:
+.irp reg,ARCH_REG_NAMES
+.if \reg != "rsp"
+.quad __x86_indirect_thunk_\reg
+.else
+.quad 0
+.endif
+.endr
+
+.global save_relpoline_funcs
+save_relpoline_funcs:
+.irp reg,ARCH_REG_NAMES
+.if \reg != "rsp"
+.quad save_relpoline_\reg
+.else
+.quad 0
+.endif
+.endr
+.popsection
+
+
 #endif
-- 
2.17.1


  parent reply	other threads:[~2018-10-18  0:56 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-18  0:54 [RFC PATCH 0/5] x86: dynamic indirect call promotion Nadav Amit
2018-10-18  0:54 ` [RFC PATCH 1/5] x86: introduce preemption disable prefix Nadav Amit
2018-10-18  1:22   ` Andy Lutomirski
2018-10-18  3:12     ` Nadav Amit
2018-10-18  3:26       ` Nadav Amit
2018-10-18  3:51       ` Andy Lutomirski
2018-10-18 16:47         ` Nadav Amit
2018-10-18 17:00           ` Andy Lutomirski
2018-10-18 17:25             ` Nadav Amit
2018-10-18 17:29               ` Andy Lutomirski
2018-10-18 17:42                 ` Nadav Amit
2018-10-19  1:08             ` Nadav Amit
2018-10-19  4:29               ` Andy Lutomirski
2018-10-19  4:44                 ` Nadav Amit
2018-10-20  1:22                   ` Masami Hiramatsu
2018-10-19  5:00                 ` Alexei Starovoitov
2018-10-19  8:22                   ` Peter Zijlstra
2018-10-19 14:47                     ` Alexei Starovoitov
2018-10-19  8:19                 ` Peter Zijlstra
2018-10-19 10:38                 ` Oleg Nesterov
2018-10-19  8:33               ` Peter Zijlstra
2018-10-19 14:29                 ` Andy Lutomirski
2018-11-29  9:46                   ` Peter Zijlstra
2018-10-18  7:54     ` Peter Zijlstra
2018-10-18 18:14       ` Nadav Amit
2018-10-18  0:54 ` Nadav Amit [this message]
2018-10-18  0:54 ` [RFC PATCH 3/5] x86: interface for accessing indirect branch locations Nadav Amit
2018-10-18  0:54 ` [RFC PATCH 4/5] x86: learning and patching indirect branch targets Nadav Amit
2018-10-18  0:54 ` [RFC PATCH 5/5] x86: relpoline: disabling interface Nadav Amit
2018-10-23 18:36 ` [RFC PATCH 0/5] x86: dynamic indirect call promotion Dave Hansen
2018-10-23 20:32   ` Nadav Amit
2018-10-23 20:37     ` Dave Hansen
2018-11-28 16:08 ` Josh Poimboeuf
2018-11-28 19:34   ` Nadav Amit
2018-11-29  0:38     ` Josh Poimboeuf
2018-11-29  1:40       ` Andy Lutomirski
2018-11-29  2:06         ` Nadav Amit
2018-11-29  3:24           ` Andy Lutomirski
2018-11-29  4:36             ` Josh Poimboeuf
2018-11-29  6:06             ` Andy Lutomirski
2018-11-29 15:19               ` Josh Poimboeuf
2018-12-01  6:52                 ` Nadav Amit
2018-12-01 14:25                   ` Josh Poimboeuf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181018005420.82993-3-namit@vmware.com \
    --to=namit@vmware.com \
    --cc=bp@alien8.de \
    --cc=dwmw@amazon.co.uk \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=nadav.amit@gmail.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).