All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/14] objtool vs retpoline
@ 2020-04-28 19:11 Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 01/14] objtool: Allow branches within the same alternative Peter Zijlstra
                   ` (17 more replies)
  0 siblings, 18 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

Hi,

Based on Alexandre's patches, here's a few that go on top of tip/objtool/core.

With these patches on objtool can completely understand retpolines and RSB
stuffing, which means it can emit valid ORC unwind information for them, which
in turn means we can now unwind through a retpoline.

New since last time:

 - 1-3, alternatives vs ORC unwind
 - 7-9: implement some suggestions from Julien
 - addressed feedback


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 01/14] objtool: Allow branches within the same alternative.
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-04-28 19:53   ` Josh Poimboeuf
  2020-04-28 19:11 ` [PATCH v2 02/14] objtool: Fix ORC vs alternatives Peter Zijlstra
                   ` (16 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

From: Alexandre Chartre <alexandre.chartre@oracle.com>

Currently objtool prevents any branch to an alternative. While preventing
branching from the outside to the middle of an alternative makes perfect
sense, branching within the same alternative should be allowed. To do so,
identify each alternative and check that a branch to an alternative comes
from the same alternative.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200414103618.12657-3-alexandre.chartre@oracle.com
---
 tools/objtool/check.c |   26 ++++++++++++++++++++------
 tools/objtool/check.h |    3 ++-
 2 files changed, 22 insertions(+), 7 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -770,7 +770,9 @@ static int handle_group_alt(struct objto
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
+	static unsigned int alt_group_next_index = 1;
 	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+	unsigned int alt_group = alt_group_next_index++;
 	unsigned long dest_off;
 
 	last_orig_insn = NULL;
@@ -779,7 +781,7 @@ static int handle_group_alt(struct objto
 		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
 			break;
 
-		insn->alt_group = true;
+		insn->alt_group = alt_group;
 		last_orig_insn = insn;
 	}
 
@@ -813,6 +815,7 @@ static int handle_group_alt(struct objto
 	}
 
 	last_new_insn = NULL;
+	alt_group = alt_group_next_index++;
 	insn = *new_insn;
 	sec_for_each_insn_from(file, insn) {
 		if (insn->offset >= special_alt->new_off + special_alt->new_len)
@@ -822,6 +825,7 @@ static int handle_group_alt(struct objto
 
 		insn->ignore = orig_insn->ignore_alts;
 		insn->func = orig_insn->func;
+		insn->alt_group = alt_group;
 
 		/*
 		 * Since alternative replacement code is copy/pasted by the
@@ -2163,6 +2167,15 @@ static int validate_return(struct symbol
 	return 0;
 }
 
+static bool is_branch_to_alternative(struct instruction *from,
+				     struct instruction *to)
+{
+	if (!from || !to->alt_group || !list_empty(&to->alts))
+		return false;
+
+	return (from->alt_group != to->alt_group);
+}
+
 /*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
@@ -2170,6 +2183,7 @@ static int validate_return(struct symbol
  * tools/objtool/Documentation/stack-validation.txt.
  */
 static int validate_branch(struct objtool_file *file, struct symbol *func,
+			   struct instruction *from,
 			   struct instruction *insn, struct insn_state state)
 {
 	struct alternative *alt;
@@ -2180,7 +2194,7 @@ static int validate_branch(struct objtoo
 
 	sec = insn->sec;
 
-	if (insn->alt_group && list_empty(&insn->alts)) {
+	if (is_branch_to_alternative(from, insn)) {
 		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
 			  sec, insn->offset);
 		return 1;
@@ -2227,7 +2241,7 @@ static int validate_branch(struct objtoo
 				if (alt->skip_orig)
 					skip_orig = true;
 
-				ret = validate_branch(file, func, alt->insn, state);
+				ret = validate_branch(file, func, NULL, alt->insn, state);
 				if (ret) {
 					if (backtrace)
 						BT_FUNC("(alt)", insn);
@@ -2271,7 +2285,7 @@ static int validate_branch(struct objtoo
 
 			} else if (insn->jump_dest) {
 				ret = validate_branch(file, func,
-						      insn->jump_dest, state);
+						      insn, insn->jump_dest, state);
 				if (ret) {
 					if (backtrace)
 						BT_FUNC("(branch)", insn);
@@ -2402,7 +2416,7 @@ static int validate_unwind_hints(struct
 
 	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
 		if (insn->hint && !insn->visited) {
-			ret = validate_branch(file, insn->func, insn, state);
+			ret = validate_branch(file, insn->func, NULL, insn, state);
 			if (ret && backtrace)
 				BT_FUNC("<=== (hint)", insn);
 			warnings += ret;
@@ -2543,7 +2557,7 @@ static int validate_symbol(struct objtoo
 
 	state->uaccess = sym->uaccess_safe;
 
-	ret = validate_branch(file, insn->func, insn, *state);
+	ret = validate_branch(file, insn->func, NULL, insn, *state);
 	if (ret && backtrace)
 		BT_FUNC("<=== (sym)", insn);
 	return ret;
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -30,12 +30,13 @@ struct instruction {
 	unsigned int len;
 	enum insn_type type;
 	unsigned long immediate;
-	bool alt_group, dead_end, ignore, ignore_alts;
+	bool dead_end, ignore, ignore_alts;
 	bool hint;
 	bool retpoline_safe;
 	s8 instr;
 	u8 visited;
 	u8 ret_offset;
+	int alt_group;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 02/14] objtool: Fix ORC vs alternatives
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 01/14] objtool: Allow branches within the same alternative Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-04-28 19:55   ` Josh Poimboeuf
                     ` (2 more replies)
  2020-04-28 19:11 ` [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives Peter Zijlstra
                   ` (15 subsequent siblings)
  17 siblings, 3 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz, Jann Horn

Jann reported that (for instance) entry_64.o:general_protection has
very odd ORC data:

  0000000000000f40 <general_protection>:
  #######sp:sp+8 bp:(und) type:iret end:0
    f40:       90                      nop
  #######sp:(und) bp:(und) type:call end:0
    f41:       90                      nop
    f42:       90                      nop
  #######sp:sp+8 bp:(und) type:iret end:0
    f43:       e8 a8 01 00 00          callq  10f0 <error_entry>
  #######sp:sp+0 bp:(und) type:regs end:0
    f48:       f6 84 24 88 00 00 00    testb  $0x3,0x88(%rsp)
    f4f:       03
    f50:       74 00                   je     f52 <general_protection+0x12>
    f52:       48 89 e7                mov    %rsp,%rdi
    f55:       48 8b 74 24 78          mov    0x78(%rsp),%rsi
    f5a:       48 c7 44 24 78 ff ff    movq   $0xffffffffffffffff,0x78(%rsp)
    f61:       ff ff
    f63:       e8 00 00 00 00          callq  f68 <general_protection+0x28>
    f68:       e9 73 02 00 00          jmpq   11e0 <error_exit>
  #######sp:(und) bp:(und) type:call end:0
    f6d:       0f 1f 00                nopl   (%rax)

Note the entry at 0xf41. Josh found this was the result of commit:

  764eef4b109a ("objtool: Rewrite alt->skip_orig")

Due to the early return in validate_branch() we no longer set
insn->cfi of the original instruction stream (the NOPs at 0xf41 and
0xf42) and we'll end up with the above weirdness.

In other discussions we realized alternatives should be ORC invariant;
that is, due to there being only a single ORC table, it must be valid
for all alternatives. The easiest way to ensure this is to not allow
any stack modifications in alternatives.

When we enforce this latter observation, we get the property that the
whole alternative must have the same CFI, which we can employ to fix
the former report.

Fixes: 764eef4b109a ("objtool: Rewrite alt->skip_orig")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/Documentation/stack-validation.txt |    7 ++++
 tools/objtool/check.c                            |   34 ++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -315,6 +315,13 @@ they mean, and suggestions for how to fi
       function tracing inserts additional calls, which is not obvious from the
       sources).
 
+10. file.o: warning: func()+0x5c: alternative modifies stack
+
+    This means that an alternative includes instructions that modify the
+    stack. The problem is that there is only one ORC unwind table, this means
+    that the ORC unwind entries must be valid for each of the alternatives.
+    The easiest way to enforce this is to ensure alternative do not contain
+    any ORC entries, which in turn implies the above constraint.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2001,6 +2001,11 @@ static int handle_insn_ops(struct instru
 	list_for_each_entry(op, &insn->stack_ops, list) {
 		int res;
 
+		if (insn->alt_group) {
+			WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
+			return -1;
+		}
+
 		res = update_cfi_state(insn, &state->cfi, op);
 		if (res)
 			return res;
@@ -2177,6 +2182,30 @@ static bool is_branch_to_alternative(str
 }
 
 /*
+ * Alternatives should not contain any ORC entries, this in turn means they
+ * should not contain any CFI ops, which implies all instructions should have
+ * the same same CFI state.
+ *
+ * It is possible to constuct alternatives that have unreachable holes that go
+ * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+ * states which then results in ORC entries, which we just said we didn't want.
+ *
+ * Avoid them by copying the CFI entry of the first instruction into the whole
+ * alternative.
+ */
+static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+{
+	struct instruction *first_insn = insn;
+	int alt_group = insn->alt_group;
+
+	sec_for_each_insn_continue(file, insn) {
+		if (insn->alt_group != alt_group)
+			break;
+		insn->cfi = first_insn->cfi;
+	}
+}
+
+/*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
  * each instruction and validate all the rules described in
@@ -2234,7 +2263,7 @@ static int validate_branch(struct objtoo
 
 		insn->visited |= visited;
 
-		if (!insn->ignore_alts) {
+		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
 			bool skip_orig = false;
 
 			list_for_each_entry(alt, &insn->alts, list) {
@@ -2249,6 +2278,9 @@ static int validate_branch(struct objtoo
 				}
 			}
 
+			if (insn->alt_group)
+				fill_alternative_cfi(file, insn);
+
 			if (skip_orig)
 				return 0;
 		}



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 01/14] objtool: Allow branches within the same alternative Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 02/14] objtool: Fix ORC vs alternatives Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-04-29  0:54   ` Brian Gerst
  2020-04-28 19:11 ` [PATCH v2 04/14] objtool: is_fentry_call() crashes if call has no destination Peter Zijlstra
                   ` (14 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

As reported by objtool:

  lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack
  lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack

the smap_{save,restore}() alternatives violate (the newly enforced)
rule on stack invariance. That is, due to there only being a single
ORC table it must be valid to any alternative. These alternatives
violate this with the direct result that unwinds will not be correct
in between these calls.

[ In specific, since we force SMAP on for objtool, running on !SMAP
hardware will observe a different stack-layout and the ORC unwinder
will stumble. ]

So rewrite the functions to unconditionally save/restore the flags,
which gives an invariant stack layout irrespective of the SMAP state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/smap.h |   11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -57,16 +57,19 @@ static __always_inline unsigned long sma
 {
 	unsigned long flags;
 
-	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
-				  X86_FEATURE_SMAP)
-		      : "=rm" (flags) : : "memory", "cc");
+	asm volatile ("# smap_save\n\t"
+		      "pushf; pop %0"
+		      : "=rm" (flags) : : "memory");
+
+	clac();
 
 	return flags;
 }
 
 static __always_inline void smap_restore(unsigned long flags)
 {
-	asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+	asm volatile ("# smap_restore\n\t"
+		      "push %0; popf"
 		      : : "g" (flags) : "memory", "cc");
 }
 



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 04/14] objtool: is_fentry_call() crashes if call has no destination
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (2 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 05/14] objtool: UNWIND_HINT_RET_OFFSET should not check registers Peter Zijlstra
                   ` (13 subsequent siblings)
  17 siblings, 0 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

From: Alexandre Chartre <alexandre.chartre@oracle.com>

Fix is_fentry_call() so that it works if a call has no destination
set (call_dest). This needs to be done in order to support intra-
function calls.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200414103618.12657-2-alexandre.chartre@oracle.com
---
 tools/objtool/check.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1490,7 +1490,7 @@ static int decode_sections(struct objtoo
 
 static bool is_fentry_call(struct instruction *insn)
 {
-	if (insn->type == INSN_CALL &&
+	if (insn->type == INSN_CALL && insn->call_dest &&
 	    insn->call_dest->type == STT_NOTYPE &&
 	    !strcmp(insn->call_dest->name, "__fentry__"))
 		return true;



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 05/14] objtool: UNWIND_HINT_RET_OFFSET should not check registers
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (3 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 04/14] objtool: is_fentry_call() crashes if call has no destination Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 06/14] objtool: Rework allocating stack_ops on decode Peter Zijlstra
                   ` (12 subsequent siblings)
  17 siblings, 0 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

From: Alexandre Chartre <alexandre.chartre@oracle.com>

UNWIND_HINT_RET_OFFSET will adjust a modified stack. However if a
callee-saved register was pushed on the stack then the stack frame
will still appear modified. So stop checking registers when
UNWIND_HINT_RET_OFFSET is used.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200407073142.20659-3-alexandre.chartre@oracle.com
---
 tools/objtool/check.c |    8 ++++++++
 1 file changed, 8 insertions(+)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1513,6 +1513,14 @@ static bool has_modified_stack_frame(str
 	if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
 		return true;
 
+	/*
+	 * If there is a ret offset hint then don't check registers
+	 * because a callee-saved register might have been pushed on
+	 * the stack.
+	 */
+	if (ret_offset)
+		return false;
+
 	for (i = 0; i < CFI_NUM_REGS; i++) {
 		if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
 		    cfi->regs[i].offset != initial_func_cfi.regs[i].offset)



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 06/14] objtool: Rework allocating stack_ops on decode
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (4 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 05/14] objtool: UNWIND_HINT_RET_OFFSET should not check registers Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 07/14] objtool: Make handle_insn_ops() unconditional Peter Zijlstra
                   ` (11 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

Wrap each stack_op in a macro that allocates and adds it to the list.
This simplifies trying to figure out what to do with the pre-allocated
stack_op at the end.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 tools/objtool/arch/x86/decode.c |  251 +++++++++++++++++++++++-----------------
 1 file changed, 147 insertions(+), 104 deletions(-)

--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -77,6 +77,11 @@ unsigned long arch_jump_destination(stru
 	return insn->offset + insn->len + insn->immediate;
 }
 
+#define ADD_OP(op) \
+	if (!(op = calloc(1, sizeof(*op)))) \
+		return -1; \
+	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, enum insn_type *type,
@@ -88,7 +93,7 @@ int arch_decode_instruction(const struct
 	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
 		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
 		      modrm_reg = 0, sib = 0;
-	struct stack_op *op;
+	struct stack_op *op = NULL;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
@@ -129,10 +134,6 @@ int arch_decode_instruction(const struct
 	if (insn.sib.nbytes)
 		sib = insn.sib.bytes[0];
 
-	op = calloc(1, sizeof(*op));
-	if (!op)
-		return -1;
-
 	switch (op1) {
 
 	case 0x1:
@@ -141,10 +142,12 @@ int arch_decode_instruction(const struct
 
 			/* add/sub reg, %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 		}
 		break;
 
@@ -152,9 +155,11 @@ int arch_decode_instruction(const struct
 
 		/* push reg */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_REG;
-		op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
-		op->dest.type = OP_DEST_PUSH;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+			op->dest.type = OP_DEST_PUSH;
+		}
 
 		break;
 
@@ -162,9 +167,11 @@ int arch_decode_instruction(const struct
 
 		/* pop reg */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		}
 
 		break;
 
@@ -172,8 +179,10 @@ int arch_decode_instruction(const struct
 	case 0x6a:
 		/* push immediate */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSH;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
 		break;
 
 	case 0x70 ... 0x7f:
@@ -188,11 +197,13 @@ int arch_decode_instruction(const struct
 		if (modrm == 0xe4) {
 			/* and imm, %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_AND;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.immediate.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_AND;
+				op->src.reg = CFI_SP;
+				op->src.offset = insn.immediate.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 			break;
 		}
 
@@ -205,11 +216,13 @@ int arch_decode_instruction(const struct
 
 		/* add/sub imm, %rsp */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_ADD;
-		op->src.reg = CFI_SP;
-		op->src.offset = insn.immediate.value * sign;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = CFI_SP;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.immediate.value * sign;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
 		break;
 
 	case 0x89:
@@ -217,10 +230,12 @@ int arch_decode_instruction(const struct
 
 			/* mov %rsp, reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = CFI_SP;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = CFI_SP;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			}
 			break;
 		}
 
@@ -228,10 +243,12 @@ int arch_decode_instruction(const struct
 
 			/* mov reg, %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 			break;
 		}
 
@@ -242,21 +259,25 @@ int arch_decode_instruction(const struct
 
 			/* mov reg, disp(%rbp) */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG_INDIRECT;
-			op->dest.reg = CFI_BP;
-			op->dest.offset = insn.displacement.value;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG_INDIRECT;
+				op->dest.reg = CFI_BP;
+				op->dest.offset = insn.displacement.value;
+			}
 
 		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
 
 			/* mov reg, disp(%rsp) */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG_INDIRECT;
-			op->dest.reg = CFI_SP;
-			op->dest.offset = insn.displacement.value;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG_INDIRECT;
+				op->dest.reg = CFI_SP;
+				op->dest.offset = insn.displacement.value;
+			}
 		}
 
 		break;
@@ -266,22 +287,26 @@ int arch_decode_instruction(const struct
 
 			/* mov disp(%rbp), reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG_INDIRECT;
-			op->src.reg = CFI_BP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG_INDIRECT;
+				op->src.reg = CFI_BP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			}
 
 		} else if (rex_w && !rex_b && sib == 0x24 &&
 			   modrm_mod != 3 && modrm_rm == 4) {
 
 			/* mov disp(%rsp), reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG_INDIRECT;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG_INDIRECT;
+				op->src.reg = CFI_SP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			}
 		}
 
 		break;
@@ -290,27 +315,31 @@ int arch_decode_instruction(const struct
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
 			*type = INSN_STACK;
-			if (!insn.displacement.value) {
-				/* lea (%rsp), reg */
-				op->src.type = OP_SRC_REG;
-			} else {
-				/* lea disp(%rsp), reg */
-				op->src.type = OP_SRC_ADD;
-				op->src.offset = insn.displacement.value;
+			ADD_OP(op) {
+				if (!insn.displacement.value) {
+					/* lea (%rsp), reg */
+					op->src.type = OP_SRC_REG;
+				} else {
+					/* lea disp(%rsp), reg */
+					op->src.type = OP_SRC_ADD;
+					op->src.offset = insn.displacement.value;
+				}
+				op->src.reg = CFI_SP;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 			}
-			op->src.reg = CFI_SP;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 
 		} else if (rex == 0x48 && modrm == 0x65) {
 
 			/* lea disp(%rbp), %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_BP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_BP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 
 		} else if (rex == 0x49 && modrm == 0x62 &&
 			   insn.displacement.value == -8) {
@@ -322,11 +351,13 @@ int arch_decode_instruction(const struct
 			 * stack realignment.
 			 */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_R10;
-			op->src.offset = -8;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_R10;
+				op->src.offset = -8;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 
 		} else if (rex == 0x49 && modrm == 0x65 &&
 			   insn.displacement.value == -16) {
@@ -338,11 +369,13 @@ int arch_decode_instruction(const struct
 			 * stack realignment.
 			 */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_R13;
-			op->src.offset = -16;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_R13;
+				op->src.offset = -16;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 		}
 
 		break;
@@ -350,8 +383,10 @@ int arch_decode_instruction(const struct
 	case 0x8f:
 		/* pop to mem */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
-		op->dest.type = OP_DEST_MEM;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_MEM;
+		}
 		break;
 
 	case 0x90:
@@ -361,15 +396,19 @@ int arch_decode_instruction(const struct
 	case 0x9c:
 		/* pushf */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSHF;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSHF;
+		}
 		break;
 
 	case 0x9d:
 		/* popf */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POPF;
-		op->dest.type = OP_DEST_MEM;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POPF;
+			op->dest.type = OP_DEST_MEM;
+		}
 		break;
 
 	case 0x0f:
@@ -405,15 +444,19 @@ int arch_decode_instruction(const struct
 
 			/* push fs/gs */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_CONST;
-			op->dest.type = OP_DEST_PUSH;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_CONST;
+				op->dest.type = OP_DEST_PUSH;
+			}
 
 		} else if (op2 == 0xa1 || op2 == 0xa9) {
 
 			/* pop fs/gs */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_POP;
-			op->dest.type = OP_DEST_MEM;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_POP;
+				op->dest.type = OP_DEST_MEM;
+			}
 		}
 
 		break;
@@ -427,7 +470,8 @@ int arch_decode_instruction(const struct
 		 * pop bp
 		 */
 		*type = INSN_STACK;
-		op->dest.type = OP_DEST_LEAVE;
+		ADD_OP(op)
+			op->dest.type = OP_DEST_LEAVE;
 
 		break;
 
@@ -449,12 +493,14 @@ int arch_decode_instruction(const struct
 	case 0xcf: /* iret */
 		*type = INSN_EXCEPTION_RETURN;
 
-		/* add $40, %rsp */
-		op->src.type = OP_SRC_ADD;
-		op->src.reg = CFI_SP;
-		op->src.offset = 5*8;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = CFI_SP;
+		ADD_OP(op) {
+			/* add $40, %rsp */
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = 5*8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
 		break;
 
 	case 0xca: /* retf */
@@ -492,8 +538,10 @@ int arch_decode_instruction(const struct
 
 			/* push from mem */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_CONST;
-			op->dest.type = OP_DEST_PUSH;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_CONST;
+				op->dest.type = OP_DEST_PUSH;
+			}
 		}
 
 		break;
@@ -504,11 +552,6 @@ int arch_decode_instruction(const struct
 
 	*immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
 
-	if (*type == INSN_STACK || *type == INSN_EXCEPTION_RETURN)
-		list_add_tail(&op->list, ops_list);
-	else
-		free(op);
-
 	return 0;
 }
 



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 07/14] objtool: Make handle_insn_ops() unconditional
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (5 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 06/14] objtool: Rework allocating stack_ops on decode Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 08/14] objtool: Remove INSN_STACK Peter Zijlstra
                   ` (10 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

Now that every instruction has a list of stack_ops; we can trivially
distinquish those instructions that do not have stack_ops, their list
is empty.

This means we can now call handle_insn_ops() unconditionally.

Suggested-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |    8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2247,6 +2247,9 @@ static int validate_branch(struct objtoo
 				return 0;
 		}
 
+		if (handle_insn_ops(insn, &state))
+			return 1;
+
 		switch (insn->type) {
 
 		case INSN_RETURN:
@@ -2306,9 +2309,6 @@ static int validate_branch(struct objtoo
 			break;
 
 		case INSN_EXCEPTION_RETURN:
-			if (handle_insn_ops(insn, &state))
-				return 1;
-
 			/*
 			 * This handles x86's sync_core() case, where we use an
 			 * IRET to self. All 'normal' IRET instructions are in
@@ -2328,8 +2328,6 @@ static int validate_branch(struct objtoo
 			return 0;
 
 		case INSN_STACK:
-			if (handle_insn_ops(insn, &state))
-				return 1;
 			break;
 
 		case INSN_STAC:



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 08/14] objtool: Remove INSN_STACK
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (6 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 07/14] objtool: Make handle_insn_ops() unconditional Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 09/14] objtool: Move the IRET hack into the arch decoder Peter Zijlstra
                   ` (9 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

With the unconditional use of handle_insn_ops(), INSN_STACK has lost
its purpose. Remove it.

Suggested-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/arch.h            |    1 -
 tools/objtool/arch/x86/decode.c |   23 -----------------------
 tools/objtool/check.c           |    3 ---
 3 files changed, 27 deletions(-)

--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -21,7 +21,6 @@ enum insn_type {
 	INSN_RETURN,
 	INSN_EXCEPTION_RETURN,
 	INSN_CONTEXT_SWITCH,
-	INSN_STACK,
 	INSN_BUG,
 	INSN_NOP,
 	INSN_STAC,
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -141,7 +141,6 @@ int arch_decode_instruction(const struct
 		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
 
 			/* add/sub reg, %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -154,7 +153,6 @@ int arch_decode_instruction(const struct
 	case 0x50 ... 0x57:
 
 		/* push reg */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_REG;
 			op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
@@ -166,7 +164,6 @@ int arch_decode_instruction(const struct
 	case 0x58 ... 0x5f:
 
 		/* pop reg */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POP;
 			op->dest.type = OP_DEST_REG;
@@ -178,7 +175,6 @@ int arch_decode_instruction(const struct
 	case 0x68:
 	case 0x6a:
 		/* push immediate */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_CONST;
 			op->dest.type = OP_DEST_PUSH;
@@ -196,7 +192,6 @@ int arch_decode_instruction(const struct
 
 		if (modrm == 0xe4) {
 			/* and imm, %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_AND;
 				op->src.reg = CFI_SP;
@@ -215,7 +210,6 @@ int arch_decode_instruction(const struct
 			break;
 
 		/* add/sub imm, %rsp */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_ADD;
 			op->src.reg = CFI_SP;
@@ -229,7 +223,6 @@ int arch_decode_instruction(const struct
 		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
 			/* mov %rsp, reg */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = CFI_SP;
@@ -242,7 +235,6 @@ int arch_decode_instruction(const struct
 		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
 
 			/* mov reg, %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -258,7 +250,6 @@ int arch_decode_instruction(const struct
 		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
 
 			/* mov reg, disp(%rbp) */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -270,7 +261,6 @@ int arch_decode_instruction(const struct
 		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
 
 			/* mov reg, disp(%rsp) */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -286,7 +276,6 @@ int arch_decode_instruction(const struct
 		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
 
 			/* mov disp(%rbp), reg */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_BP;
@@ -299,7 +288,6 @@ int arch_decode_instruction(const struct
 			   modrm_mod != 3 && modrm_rm == 4) {
 
 			/* mov disp(%rsp), reg */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_SP;
@@ -314,7 +302,6 @@ int arch_decode_instruction(const struct
 	case 0x8d:
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				if (!insn.displacement.value) {
 					/* lea (%rsp), reg */
@@ -332,7 +319,6 @@ int arch_decode_instruction(const struct
 		} else if (rex == 0x48 && modrm == 0x65) {
 
 			/* lea disp(%rbp), %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = CFI_BP;
@@ -350,7 +336,6 @@ int arch_decode_instruction(const struct
 			 * Restoring rsp back to its original value after a
 			 * stack realignment.
 			 */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = CFI_R10;
@@ -368,7 +353,6 @@ int arch_decode_instruction(const struct
 			 * Restoring rsp back to its original value after a
 			 * stack realignment.
 			 */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = CFI_R13;
@@ -382,7 +366,6 @@ int arch_decode_instruction(const struct
 
 	case 0x8f:
 		/* pop to mem */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POP;
 			op->dest.type = OP_DEST_MEM;
@@ -395,7 +378,6 @@ int arch_decode_instruction(const struct
 
 	case 0x9c:
 		/* pushf */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_CONST;
 			op->dest.type = OP_DEST_PUSHF;
@@ -404,7 +386,6 @@ int arch_decode_instruction(const struct
 
 	case 0x9d:
 		/* popf */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POPF;
 			op->dest.type = OP_DEST_MEM;
@@ -443,7 +424,6 @@ int arch_decode_instruction(const struct
 		} else if (op2 == 0xa0 || op2 == 0xa8) {
 
 			/* push fs/gs */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_CONST;
 				op->dest.type = OP_DEST_PUSH;
@@ -452,7 +432,6 @@ int arch_decode_instruction(const struct
 		} else if (op2 == 0xa1 || op2 == 0xa9) {
 
 			/* pop fs/gs */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_POP;
 				op->dest.type = OP_DEST_MEM;
@@ -469,7 +448,6 @@ int arch_decode_instruction(const struct
 		 * mov bp, sp
 		 * pop bp
 		 */
-		*type = INSN_STACK;
 		ADD_OP(op)
 			op->dest.type = OP_DEST_LEAVE;
 
@@ -537,7 +515,6 @@ int arch_decode_instruction(const struct
 		else if (modrm_reg == 6) {
 
 			/* push from mem */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_CONST;
 				op->dest.type = OP_DEST_PUSH;
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2327,9 +2327,6 @@ static int validate_branch(struct objtoo
 			}
 			return 0;
 
-		case INSN_STACK:
-			break;
-
 		case INSN_STAC:
 			if (state.uaccess) {
 				WARN_FUNC("recursive UACCESS enable", sec, insn->offset);



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 09/14] objtool: Move the IRET hack into the arch decoder
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (7 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 08/14] objtool: Remove INSN_STACK Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Miroslav Benes
  2020-04-28 19:11 ` [PATCH v2 10/14] objtool: Add support for intra-function calls Peter Zijlstra
                   ` (8 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

From: Miroslav Benes <mbenes@suse.cz>

Quoting Julien:

  "And the other suggestion is my other email was that you don't even
  need to add INSN_EXCEPTION_RETURN. You can keep IRET as
  INSN_CONTEXT_SWITCH by default and x86 decoder lookups the symbol
  conaining an iret. If it's a function symbol, it can just set the type
  to INSN_OTHER so that it caries on to the next instruction after
  having handled the stack_op."

Suggested-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/arch.h            |    1 -
 tools/objtool/arch/x86/decode.c |   28 ++++++++++++++++++----------
 tools/objtool/check.c           |   11 -----------
 tools/objtool/elf.c             |    4 ++--
 tools/objtool/elf.h             |    2 +-
 5 files changed, 21 insertions(+), 25 deletions(-)

--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -19,7 +19,6 @@ enum insn_type {
 	INSN_CALL,
 	INSN_CALL_DYNAMIC,
 	INSN_RETURN,
-	INSN_EXCEPTION_RETURN,
 	INSN_CONTEXT_SWITCH,
 	INSN_BUG,
 	INSN_NOP,
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -94,6 +94,7 @@ int arch_decode_instruction(const struct
 		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
 		      modrm_reg = 0, sib = 0;
 	struct stack_op *op = NULL;
+	struct symbol *sym;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
@@ -469,17 +470,24 @@ int arch_decode_instruction(const struct
 		break;
 
 	case 0xcf: /* iret */
-		*type = INSN_EXCEPTION_RETURN;
-
-		ADD_OP(op) {
-			/* add $40, %rsp */
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_SP;
-			op->src.offset = 5*8;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+		/*
+		 * Handle sync_core(), which has an IRET to self.
+		 * All other IRET are in STT_NONE entry code.
+		 */
+		sym = find_symbol_containing(sec, offset);
+		if (sym && sym->type == STT_FUNC) {
+			ADD_OP(op) {
+				/* add $40, %rsp */
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_SP;
+				op->src.offset = 5*8;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
+			break;
 		}
-		break;
+
+		/* fallthrough */
 
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2308,17 +2308,6 @@ static int validate_branch(struct objtoo
 
 			break;
 
-		case INSN_EXCEPTION_RETURN:
-			/*
-			 * This handles x86's sync_core() case, where we use an
-			 * IRET to self. All 'normal' IRET instructions are in
-			 * STT_NOTYPE entry symbols.
-			 */
-			if (func)
-				break;
-
-			return 0;
-
 		case INSN_CONTEXT_SWITCH:
 			if (func && (!next_insn || !next_insn->hint)) {
 				WARN_FUNC("unsupported instruction in callable function",
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -61,7 +61,7 @@ static void rb_add(struct rb_root *tree,
 	rb_insert_color(node, tree);
 }
 
-static struct rb_node *rb_find_first(struct rb_root *tree, const void *key,
+static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key,
 			       int (*cmp)(const void *key, const struct rb_node *))
 {
 	struct rb_node *node = tree->rb_node;
@@ -189,7 +189,7 @@ struct symbol *find_func_by_offset(struc
 	return NULL;
 }
 
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
 {
 	struct rb_node *node;
 
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -124,7 +124,7 @@ struct section *find_section_by_name(con
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len);



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 10/14] objtool: Add support for intra-function calls
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (8 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 09/14] objtool: Move the IRET hack into the arch decoder Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 11/14] x86/speculation: Change FILL_RETURN_BUFFER to work with objtool Peter Zijlstra
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

From: Alexandre Chartre <alexandre.chartre@oracle.com>

Change objtool to support intra-function calls. On x86, an intra-function
call is represented in objtool as a push onto the stack (of the return
address), and a jump to the destination address. That way the stack
information is correctly updated and the call flow is still accurate.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200414103618.12657-4-alexandre.chartre@oracle.com
---
 include/linux/frame.h                            |   11 +++
 tools/objtool/Documentation/stack-validation.txt |    8 ++
 tools/objtool/arch/x86/decode.c                  |    8 ++
 tools/objtool/check.c                            |   79 +++++++++++++++++++++--
 4 files changed, 102 insertions(+), 4 deletions(-)

--- a/include/linux/frame.h
+++ b/include/linux/frame.h
@@ -15,9 +15,20 @@
 	static void __used __section(.discard.func_stack_frame_non_standard) \
 		*__func_stack_frame_non_standard_##func = func
 
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL				\
+	999:							\
+	.pushsection .discard.intra_function_calls;		\
+	.long 999b;						\
+	.popsection;
+
 #else /* !CONFIG_STACK_VALIDATION */
 
 #define STACK_FRAME_NON_STANDARD(func)
+#define ANNOTATE_INTRA_FUNCTION_CALL
 
 #endif /* CONFIG_STACK_VALIDATION */
 
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -323,6 +323,14 @@ they mean, and suggestions for how to fi
     The easiest way to enforce this is to ensure alternative do not contain
     any ORC entries, which in turn implies the above constraint.
 
+11. file.o: warning: unannotated intra-function call
+
+   This warning means that a direct call is done to a destination which
+   is not at the beginning of a function. If this is a legit call, you
+   can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+   directive right before the call.
+
+
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
 
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -496,6 +496,14 @@ int arch_decode_instruction(const struct
 
 	case 0xe8:
 		*type = INSN_CALL;
+		/*
+		 * For the impact on the stack, a CALL behaves like
+		 * a PUSH of an immediate value (the return address).
+		 */
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
 		break;
 
 	case 0xfc:
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -692,6 +692,16 @@ static int add_jump_destinations(struct
 	return 0;
 }
 
+static void remove_insn_ops(struct instruction *insn)
+{
+	struct stack_op *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+		list_del(&op->list);
+		free(op);
+	}
+}
+
 /*
  * Find the destination instructions for all calls.
  */
@@ -717,10 +727,7 @@ static int add_call_destinations(struct
 				continue;
 
 			if (!insn->call_dest) {
-				WARN_FUNC("unsupported intra-function call",
-					  insn->sec, insn->offset);
-				if (retpoline)
-					WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+				WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
 				return -1;
 			}
 
@@ -743,6 +750,15 @@ static int add_call_destinations(struct
 			}
 		} else
 			insn->call_dest = rela->sym;
+
+		/*
+		 * Whatever stack impact regular CALLs have, should be undone
+		 * by the RETURN of the called function.
+		 *
+		 * Annotated intra-function calls retain the stack_ops but
+		 * are converted to JUMP, see read_intra_function_calls().
+		 */
+		remove_insn_ops(insn);
 	}
 
 	return 0;
@@ -1422,6 +1438,57 @@ static int read_instr_hints(struct objto
 	return 0;
 }
 
+static int read_intra_function_calls(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct section *sec;
+	struct rela *rela;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		unsigned long dest_off;
+
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s",
+			     sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.intra_function_call entry");
+			return -1;
+		}
+
+		if (insn->type != INSN_CALL) {
+			WARN_FUNC("intra_function_call not a direct call",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/*
+		 * Treat intra-function CALLs as JMPs, but with a stack_op.
+		 * See add_call_destinations(), which strips stack_ops from
+		 * normal CALLs.
+		 */
+		insn->type = INSN_JUMP_UNCONDITIONAL;
+
+		dest_off = insn->offset + insn->len + insn->immediate;
+		insn->jump_dest = find_insn(file, insn->sec, dest_off);
+		if (!insn->jump_dest) {
+			WARN_FUNC("can't find call dest at %s+0x%lx",
+				  insn->sec, insn->offset,
+				  insn->sec->name, dest_off);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 static void mark_rodata(struct objtool_file *file)
 {
 	struct section *sec;
@@ -1477,6 +1544,10 @@ static int decode_sections(struct objtoo
 	if (ret)
 		return ret;
 
+	ret = read_intra_function_calls(file);
+	if (ret)
+		return ret;
+
 	ret = add_call_destinations(file);
 	if (ret)
 		return ret;



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 11/14] x86/speculation: Change FILL_RETURN_BUFFER to work with objtool
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (9 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 10/14] objtool: Add support for intra-function calls Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 12/14] x86: Simplify retpoline declaration Peter Zijlstra
                   ` (6 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

Change FILL_RETURN_BUFFER so that objtool groks it and can generate
correct ORC unwind information.

 - Since ORC is alternative invariant; that is, all alternatives
   should have the same ORC entries, the __FILL_RETURN_BUFFER body
   can not be part of an alternative.

   Therefore, move it out of the alternative and keep the alternative
   as a sort of jump_label around it.

 - Use the ANNOTATE_INTRA_FUNCTION_CALL annotation to white-list
   these 'funny' call instructions to nowhere.

 - Use UNWIND_HINT_EMPTY to 'fill' the speculation traps, otherwise
   objtool will consider them unreachable.

 - Move the RSP adjustment into the loop, such that the loop has a
   deterministic stack layout.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 arch/x86/include/asm/nospec-branch.h |   16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,11 +4,13 @@
 #define _ASM_X86_NOSPEC_BRANCH_H_
 
 #include <linux/static_key.h>
+#include <linux/frame.h>
 
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
 
 /*
  * This should be used immediately before a retpoline alternative. It tells
@@ -46,21 +48,25 @@
 #define __FILL_RETURN_BUFFER(reg, nr, sp)	\
 	mov	$(nr/2), reg;			\
 771:						\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
 	call	772f;				\
 773:	/* speculation trap */			\
+	UNWIND_HINT_EMPTY;			\
 	pause;					\
 	lfence;					\
 	jmp	773b;				\
 772:						\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
 	call	774f;				\
 775:	/* speculation trap */			\
+	UNWIND_HINT_EMPTY;			\
 	pause;					\
 	lfence;					\
 	jmp	775b;				\
 774:						\
+	add	$(BITS_PER_LONG/8) * 2, sp;	\
 	dec	reg;				\
-	jnz	771b;				\
-	add	$(BITS_PER_LONG/8) * nr, sp;
+	jnz	771b;
 
 #ifdef __ASSEMBLY__
 
@@ -137,10 +143,8 @@
   */
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
-		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
-		\ftr
+	ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+	__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
 .Lskip_rsb_\@:
 #endif
 .endm



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 12/14] x86: Simplify retpoline declaration
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (10 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 11/14] x86/speculation: Change FILL_RETURN_BUFFER to work with objtool Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 13/14] x86: Change {JMP,CALL}_NOSPEC argument Peter Zijlstra
                   ` (5 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz, arnd

Because of how KSYM works, we need one declaration per line. Seeing
how we're going to be doubling the amount of retpoline symbols,
simplify the machinery in order to avoid having to copy/paste even
more.

Cc: arnd@arndb.de
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/GEN-for-each-reg.h |   25 ++++++++++++++++++++++
 arch/x86/include/asm/asm-prototypes.h   |   28 +++++++------------------
 arch/x86/lib/retpoline.S                |   35 +++++++++++++-------------------
 3 files changed, 48 insertions(+), 40 deletions(-)

--- /dev/null
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -0,0 +1,25 @@
+#ifdef CONFIG_64BIT
+GEN(rax)
+GEN(rbx)
+GEN(rcx)
+GEN(rdx)
+GEN(rsi)
+GEN(rdi)
+GEN(rbp)
+GEN(r8)
+GEN(r9)
+GEN(r10)
+GEN(r11)
+GEN(r12)
+GEN(r13)
+GEN(r14)
+GEN(r15)
+#else
+GEN(eax)
+GEN(ebx)
+GEN(ecx)
+GEN(edx)
+GEN(esi)
+GEN(edi)
+GEN(ebp)
+#endif
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -17,24 +17,12 @@ extern void cmpxchg8b_emu(void);
 #endif
 
 #ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
-#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
-INDIRECT_THUNK(8)
-INDIRECT_THUNK(9)
-INDIRECT_THUNK(10)
-INDIRECT_THUNK(11)
-INDIRECT_THUNK(12)
-INDIRECT_THUNK(13)
-INDIRECT_THUNK(14)
-INDIRECT_THUNK(15)
-#endif
-INDIRECT_THUNK(ax)
-INDIRECT_THUNK(bx)
-INDIRECT_THUNK(cx)
-INDIRECT_THUNK(dx)
-INDIRECT_THUNK(si)
-INDIRECT_THUNK(di)
-INDIRECT_THUNK(bp)
+
+#define DECL_INDIRECT_THUNK(reg) \
+	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+
+#undef GEN
+#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -24,25 +24,20 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
  * only see one instance of "__x86_indirect_thunk_\reg" rather
  * than one per register with the correct names. So we do it
  * the simple and nasty way...
+ *
+ * Worse, you can only have a single EXPORT_SYMBOL per line,
+ * and CPP can't insert newlines, so we have to repeat everything
+ * at least twice.
  */
-#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
 
-GENERATE_THUNK(_ASM_AX)
-GENERATE_THUNK(_ASM_BX)
-GENERATE_THUNK(_ASM_CX)
-GENERATE_THUNK(_ASM_DX)
-GENERATE_THUNK(_ASM_SI)
-GENERATE_THUNK(_ASM_DI)
-GENERATE_THUNK(_ASM_BP)
-#ifdef CONFIG_64BIT
-GENERATE_THUNK(r8)
-GENERATE_THUNK(r9)
-GENERATE_THUNK(r10)
-GENERATE_THUNK(r11)
-GENERATE_THUNK(r12)
-GENERATE_THUNK(r13)
-GENERATE_THUNK(r14)
-GENERATE_THUNK(r15)
-#endif
+#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+
+#undef GEN
+#define GEN(reg) THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 13/14] x86: Change {JMP,CALL}_NOSPEC argument
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (11 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 12/14] x86: Simplify retpoline declaration Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 19:11 ` [PATCH v2 14/14] x86/retpoline: Fix retpoline unwind Peter Zijlstra
                   ` (4 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

In order to change the {JMP,CALL}_NOSPEC macros to call out-of-line
versions of the retpoline magic, we need to remove the '%' from the
argument, such that we can paste it onto symbol names.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/crypto/aesni-intel_asm.S            |    4 ++--
 arch/x86/crypto/camellia-aesni-avx-asm_64.S  |    2 +-
 arch/x86/crypto/camellia-aesni-avx2-asm_64.S |    2 +-
 arch/x86/crypto/crc32c-pcl-intel-asm_64.S    |   26 +++++++++++++-------------
 arch/x86/entry/entry_32.S                    |    6 +++---
 arch/x86/entry/entry_64.S                    |    2 +-
 arch/x86/include/asm/nospec-branch.h         |   16 ++++++++--------
 arch/x86/kernel/ftrace_32.S                  |    2 +-
 arch/x86/kernel/ftrace_64.S                  |    4 ++--
 arch/x86/lib/checksum_32.S                   |    4 ++--
 arch/x86/platform/efi/efi_stub_64.S          |    2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
 	pxor INC, STATE4
 	movdqu IV, 0x30(OUTP)
 
-	CALL_NOSPEC %r11
+	CALL_NOSPEC r11
 
 	movdqu 0x00(OUTP), INC
 	pxor INC, STATE1
@@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
 	_aesni_gf128mul_x_ble()
 	movups IV, (IVP)
 
-	CALL_NOSPEC %r11
+	CALL_NOSPEC r11
 
 	movdqu 0x40(OUTP), INC
 	pxor INC, STATE1
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_
 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-	CALL_NOSPEC %r9;
+	CALL_NOSPEC r9;
 
 	addq $(16 * 16), %rsp;
 
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_
 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-	CALL_NOSPEC %r9;
+	CALL_NOSPEC r9;
 
 	addq $(16 * 32), %rsp;
 
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -75,7 +75,7 @@
 
 .text
 SYM_FUNC_START(crc_pcl)
-#define    bufp		%rdi
+#define    bufp		rdi
 #define    bufp_dw	%edi
 #define    bufp_w	%di
 #define    bufp_b	%dil
@@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl)
 	## 1) ALIGN:
 	################################################################
 
-	mov     bufp, bufptmp		# rdi = *buf
-	neg     bufp
-	and     $7, bufp		# calculate the unalignment amount of
+	mov     %bufp, bufptmp		# rdi = *buf
+	neg     %bufp
+	and     $7, %bufp		# calculate the unalignment amount of
 					# the address
 	je      proc_block		# Skip if aligned
 
@@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl)
 do_align:
 	#### Calculate CRC of unaligned bytes of the buffer (if any)
 	movq    (bufptmp), tmp		# load a quadward from the buffer
-	add     bufp, bufptmp		# align buffer pointer for quadword
+	add     %bufp, bufptmp		# align buffer pointer for quadword
 					# processing
-	sub     bufp, len		# update buffer length
+	sub     %bufp, len		# update buffer length
 align_loop:
 	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
 	shr     $8, tmp			# get next byte
-	dec     bufp
+	dec     %bufp
 	jne     align_loop
 
 proc_block:
@@ -169,10 +169,10 @@ SYM_FUNC_START(crc_pcl)
 	xor     crc2, crc2
 
 	## branch into array
-	lea	jump_table(%rip), bufp
-	movzxw  (bufp, %rax, 2), len
-	lea	crc_array(%rip), bufp
-	lea     (bufp, len, 1), bufp
+	lea	jump_table(%rip), %bufp
+	movzxw  (%bufp, %rax, 2), len
+	lea	crc_array(%rip), %bufp
+	lea     (%bufp, len, 1), %bufp
 	JMP_NOSPEC bufp
 
 	################################################################
@@ -218,9 +218,9 @@ LABEL crc_ %i
 	## 4) Combine three results:
 	################################################################
 
-	lea	(K_table-8)(%rip), bufp		# first entry is for idx 1
+	lea	(K_table-8)(%rip), %bufp		# first entry is for idx 1
 	shlq    $3, %rax			# rax *= 8
-	pmovzxdq (bufp,%rax), %xmm0		# 2 consts: K1:K2
+	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
 	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
 	subq    %rax, tmp			# tmp -= rax*24
 
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork)
 
 	/* kernel thread */
 1:	movl	%edi, %eax
-	CALL_NOSPEC %ebx
+	CALL_NOSPEC ebx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exce
 
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
+	CALL_NOSPEC edi
 	jmp	ret_from_exception
 SYM_CODE_END(common_exception_read_cr2)
 
@@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exce
 
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
+	CALL_NOSPEC edi
 	jmp	ret_from_exception
 SYM_CODE_END(common_exception)
 
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -349,7 +349,7 @@ SYM_CODE_START(ret_from_fork)
 	/* kernel thread */
 	UNWIND_HINT_EMPTY
 	movq	%r12, %rdi
-	CALL_NOSPEC %rbx
+	CALL_NOSPEC rbx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -118,22 +118,22 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\
-		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg),	\
+		__stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
-	jmp	*\reg
+	jmp	*%\reg
 #endif
 .endm
 
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\
-		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\
+		__stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
-	call	*\reg
+	call	*%\reg
 #endif
 .endm
 
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -189,5 +189,5 @@ SYM_CODE_END(ftrace_graph_caller)
 	movl	%eax, %ecx
 	popl	%edx
 	popl	%eax
-	JMP_NOSPEC %ecx
+	JMP_NOSPEC ecx
 #endif
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -301,7 +301,7 @@ SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBA
 	 * function tracing is enabled.
 	 */
 	movq ftrace_trace_function, %r8
-	CALL_NOSPEC %r8
+	CALL_NOSPEC r8
 	restore_mcount_regs
 
 	jmp fgraph_trace
@@ -338,6 +338,6 @@ SYM_CODE_START(return_to_handler)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $24, %rsp
-	JMP_NOSPEC %rdi
+	JMP_NOSPEC rdi
 SYM_CODE_END(return_to_handler)
 #endif
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial)
 	negl %ebx
 	lea 45f(%ebx,%ebx,2), %ebx
 	testl %esi, %esi
-	JMP_NOSPEC %ebx
+	JMP_NOSPEC ebx
 
 	# Handle 2-byte-aligned regions
 20:	addw (%esi), %ax
@@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic
 	andl $-32,%edx
 	lea 3f(%ebx,%ebx), %ebx
 	testl %esi, %esi 
-	JMP_NOSPEC %ebx
+	JMP_NOSPEC ebx
 1:	addl $64,%esi
 	addl $64,%edi 
 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
-	CALL_NOSPEC %rdi
+	CALL_NOSPEC rdi
 	leave
 	ret
 SYM_FUNC_END(__efi_call)



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 14/14] x86/retpoline: Fix retpoline unwind
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (12 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 13/14] x86: Change {JMP,CALL}_NOSPEC argument Peter Zijlstra
@ 2020-04-28 19:11 ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2020-04-28 20:17 ` [PATCH v2 00/14] objtool vs retpoline Josh Poimboeuf
                   ` (3 subsequent siblings)
  17 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-28 19:11 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre
  Cc: linux-kernel, jthierry, tglx, x86, mbenes, peterz

Currently objtool cannot understand retpolines, and thus cannot
generate ORC unwind information for them. This means that we cannot
unwind from the middle of a retpoline.

The recent ANNOTATE_INTRA_FUNCTION_CALL and UNWIND_HINT_RET_OFFSET
support in objtool enables it to understand the basic retpoline
construct. A further problem is that the ORC unwind information is
alternative invariant; IOW. every alternative should have the same
ORC, retpolines obviously violate this. This means we need to
out-of-line them.

Since all GCC generated code already uses out-of-line retpolines, this
should not affect performance much, if anything.

This will enable objtool to generate valid ORC data for the
out-of-line copies, which means we can correctly and reliably unwind
through a retpoline.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/asm-prototypes.h |    7 ++++
 arch/x86/include/asm/nospec-branch.h  |   56 ++++------------------------------
 arch/x86/lib/retpoline.S              |   26 +++++++++++++--
 3 files changed, 38 insertions(+), 51 deletions(-)

--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -21,8 +21,15 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
 	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
+#define DECL_RETPOLINE(reg) \
+	extern asmlinkage void __x86_retpoline_ ## reg (void);
+
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
+#undef GEN
+#define GEN(reg) DECL_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
+
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -13,15 +13,6 @@
 #include <asm/unwind_hints.h>
 
 /*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
-	ANNOTATE_IGNORE_ALTERNATIVE
-
-/*
  * Fill the CPU return stack buffer.
  *
  * Each entry in the RSB, if used for a speculative 'ret', contains an
@@ -83,44 +74,15 @@
 .endm
 
 /*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
- */
-.macro RETPOLINE_JMP reg:req
-	call	.Ldo_rop_\@
-.Lspec_trap_\@:
-	pause
-	lfence
-	jmp	.Lspec_trap_\@
-.Ldo_rop_\@:
-	mov	\reg, (%_ASM_SP)
-	ret
-.endm
-
-/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
- */
-.macro RETPOLINE_CALL reg:req
-	jmp	.Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
-	RETPOLINE_JMP \reg
-.Ldo_call_\@:
-	call	.Ldo_retpoline_jmp_\@
-.endm
-
-/*
  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
  * indirect jmp/call which may be susceptible to the Spectre variant 2
  * attack.
  */
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg),	\
-		__stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+		      __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	jmp	*%\reg
 #endif
@@ -128,10 +90,9 @@
 
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\
-		__stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+		      __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	call	*%\reg
 #endif
@@ -165,16 +126,16 @@
  * which is ensured when CONFIG_RETPOLINE is defined.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
+	"call __x86_retpoline_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
 	X86_FEATURE_RETPOLINE_AMD)
+
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
 #else /* CONFIG_X86_32 */
@@ -184,7 +145,6 @@
  * here, anyway.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,15 +7,31 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
+#include <asm/frame.h>
 
 .macro THUNK reg
 	.section .text.__x86.indirect_thunk
 
+	.align 32
 SYM_FUNC_START(__x86_indirect_thunk_\reg)
-	CFI_STARTPROC
-	JMP_NOSPEC %\reg
-	CFI_ENDPROC
+	JMP_NOSPEC \reg
 SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+	ANNOTATE_INTRA_FUNCTION_CALL
+	call	.Ldo_rop_\@
+.Lspec_trap_\@:
+	UNWIND_HINT_EMPTY
+	pause
+	lfence
+	jmp	.Lspec_trap_\@
+.Ldo_rop_\@:
+	mov	%\reg, (%_ASM_SP)
+	UNWIND_HINT_RET_OFFSET
+	ret
+SYM_FUNC_END(__x86_retpoline_\reg)
+
 .endm
 
 /*
@@ -32,6 +48,7 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 
 #define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
 #define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
 
 #undef GEN
 #define GEN(reg) THUNK reg
@@ -41,3 +58,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
+#undef GEN
+#define GEN(reg) EXPORT_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 01/14] objtool: Allow branches within the same alternative.
  2020-04-28 19:11 ` [PATCH v2 01/14] objtool: Allow branches within the same alternative Peter Zijlstra
@ 2020-04-28 19:53   ` Josh Poimboeuf
  0 siblings, 0 replies; 39+ messages in thread
From: Josh Poimboeuf @ 2020-04-28 19:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: alexandre.chartre, linux-kernel, jthierry, tglx, x86, mbenes

On Tue, Apr 28, 2020 at 09:11:02PM +0200, Peter Zijlstra wrote:
> From: Alexandre Chartre <alexandre.chartre@oracle.com>
> 
> Currently objtool prevents any branch to an alternative. While preventing
> branching from the outside to the middle of an alternative makes perfect
> sense, branching within the same alternative should be allowed. To do so,
> identify each alternative and check that a branch to an alternative comes
> from the same alternative.
> 
> Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: https://lkml.kernel.org/r/20200414103618.12657-3-alexandre.chartre@oracle.com

I still don't like this patch, other than the "alt_group =
alt_group_next_index++" thing as a prerequisite for the next patch.

Instead of all this, let's just get rid of the 

  "don't know how to handle branch to middle of alternative instruction group"

warning, which I think we decided is useless and not worth the effort
anyway.

> ---
>  tools/objtool/check.c |   26 ++++++++++++++++++++------
>  tools/objtool/check.h |    3 ++-
>  2 files changed, 22 insertions(+), 7 deletions(-)
> 
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -770,7 +770,9 @@ static int handle_group_alt(struct objto
>  			    struct instruction *orig_insn,
>  			    struct instruction **new_insn)
>  {
> +	static unsigned int alt_group_next_index = 1;
>  	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
> +	unsigned int alt_group = alt_group_next_index++;
>  	unsigned long dest_off;
>  
>  	last_orig_insn = NULL;
> @@ -779,7 +781,7 @@ static int handle_group_alt(struct objto
>  		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
>  			break;
>  
> -		insn->alt_group = true;
> +		insn->alt_group = alt_group;
>  		last_orig_insn = insn;
>  	}
>  
> @@ -813,6 +815,7 @@ static int handle_group_alt(struct objto
>  	}
>  
>  	last_new_insn = NULL;
> +	alt_group = alt_group_next_index++;
>  	insn = *new_insn;
>  	sec_for_each_insn_from(file, insn) {
>  		if (insn->offset >= special_alt->new_off + special_alt->new_len)
> @@ -822,6 +825,7 @@ static int handle_group_alt(struct objto
>  
>  		insn->ignore = orig_insn->ignore_alts;
>  		insn->func = orig_insn->func;
> +		insn->alt_group = alt_group;
>  
>  		/*
>  		 * Since alternative replacement code is copy/pasted by the
> @@ -2163,6 +2167,15 @@ static int validate_return(struct symbol
>  	return 0;
>  }
>  
> +static bool is_branch_to_alternative(struct instruction *from,
> +				     struct instruction *to)
> +{
> +	if (!from || !to->alt_group || !list_empty(&to->alts))
> +		return false;
> +
> +	return (from->alt_group != to->alt_group);
> +}
> +
>  /*
>   * Follow the branch starting at the given instruction, and recursively follow
>   * any other branches (jumps).  Meanwhile, track the frame pointer state at
> @@ -2170,6 +2183,7 @@ static int validate_return(struct symbol
>   * tools/objtool/Documentation/stack-validation.txt.
>   */
>  static int validate_branch(struct objtool_file *file, struct symbol *func,
> +			   struct instruction *from,
>  			   struct instruction *insn, struct insn_state state)
>  {
>  	struct alternative *alt;
> @@ -2180,7 +2194,7 @@ static int validate_branch(struct objtoo
>  
>  	sec = insn->sec;
>  
> -	if (insn->alt_group && list_empty(&insn->alts)) {
> +	if (is_branch_to_alternative(from, insn)) {
>  		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
>  			  sec, insn->offset);
>  		return 1;
> @@ -2227,7 +2241,7 @@ static int validate_branch(struct objtoo
>  				if (alt->skip_orig)
>  					skip_orig = true;
>  
> -				ret = validate_branch(file, func, alt->insn, state);
> +				ret = validate_branch(file, func, NULL, alt->insn, state);
>  				if (ret) {
>  					if (backtrace)
>  						BT_FUNC("(alt)", insn);
> @@ -2271,7 +2285,7 @@ static int validate_branch(struct objtoo
>  
>  			} else if (insn->jump_dest) {
>  				ret = validate_branch(file, func,
> -						      insn->jump_dest, state);
> +						      insn, insn->jump_dest, state);
>  				if (ret) {
>  					if (backtrace)
>  						BT_FUNC("(branch)", insn);
> @@ -2402,7 +2416,7 @@ static int validate_unwind_hints(struct
>  
>  	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
>  		if (insn->hint && !insn->visited) {
> -			ret = validate_branch(file, insn->func, insn, state);
> +			ret = validate_branch(file, insn->func, NULL, insn, state);
>  			if (ret && backtrace)
>  				BT_FUNC("<=== (hint)", insn);
>  			warnings += ret;
> @@ -2543,7 +2557,7 @@ static int validate_symbol(struct objtoo
>  
>  	state->uaccess = sym->uaccess_safe;
>  
> -	ret = validate_branch(file, insn->func, insn, *state);
> +	ret = validate_branch(file, insn->func, NULL, insn, *state);
>  	if (ret && backtrace)
>  		BT_FUNC("<=== (sym)", insn);
>  	return ret;
> --- a/tools/objtool/check.h
> +++ b/tools/objtool/check.h
> @@ -30,12 +30,13 @@ struct instruction {
>  	unsigned int len;
>  	enum insn_type type;
>  	unsigned long immediate;
> -	bool alt_group, dead_end, ignore, ignore_alts;
> +	bool dead_end, ignore, ignore_alts;
>  	bool hint;
>  	bool retpoline_safe;
>  	s8 instr;
>  	u8 visited;
>  	u8 ret_offset;
> +	int alt_group;
>  	struct symbol *call_dest;
>  	struct instruction *jump_dest;
>  	struct instruction *first_jump_src;
> 
> 

-- 
Josh


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 02/14] objtool: Fix ORC vs alternatives
  2020-04-28 19:11 ` [PATCH v2 02/14] objtool: Fix ORC vs alternatives Peter Zijlstra
@ 2020-04-28 19:55   ` Josh Poimboeuf
  2020-04-29 14:33   ` Miroslav Benes
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2 siblings, 0 replies; 39+ messages in thread
From: Josh Poimboeuf @ 2020-04-28 19:55 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: alexandre.chartre, linux-kernel, jthierry, tglx, x86, mbenes, Jann Horn

On Tue, Apr 28, 2020 at 09:11:03PM +0200, Peter Zijlstra wrote:
> Jann reported that (for instance) entry_64.o:general_protection has
> very odd ORC data:
> 
>   0000000000000f40 <general_protection>:
>   #######sp:sp+8 bp:(und) type:iret end:0
>     f40:       90                      nop
>   #######sp:(und) bp:(und) type:call end:0
>     f41:       90                      nop
>     f42:       90                      nop
>   #######sp:sp+8 bp:(und) type:iret end:0
>     f43:       e8 a8 01 00 00          callq  10f0 <error_entry>
>   #######sp:sp+0 bp:(und) type:regs end:0
>     f48:       f6 84 24 88 00 00 00    testb  $0x3,0x88(%rsp)
>     f4f:       03
>     f50:       74 00                   je     f52 <general_protection+0x12>
>     f52:       48 89 e7                mov    %rsp,%rdi
>     f55:       48 8b 74 24 78          mov    0x78(%rsp),%rsi
>     f5a:       48 c7 44 24 78 ff ff    movq   $0xffffffffffffffff,0x78(%rsp)
>     f61:       ff ff
>     f63:       e8 00 00 00 00          callq  f68 <general_protection+0x28>
>     f68:       e9 73 02 00 00          jmpq   11e0 <error_exit>
>   #######sp:(und) bp:(und) type:call end:0
>     f6d:       0f 1f 00                nopl   (%rax)
> 
> Note the entry at 0xf41. Josh found this was the result of commit:
> 
>   764eef4b109a ("objtool: Rewrite alt->skip_orig")
> 
> Due to the early return in validate_branch() we no longer set
> insn->cfi of the original instruction stream (the NOPs at 0xf41 and
> 0xf42) and we'll end up with the above weirdness.
> 
> In other discussions we realized alternatives should be ORC invariant;
> that is, due to there being only a single ORC table, it must be valid
> for all alternatives. The easiest way to ensure this is to not allow
> any stack modifications in alternatives.
> 
> When we enforce this latter observation, we get the property that the
> whole alternative must have the same CFI, which we can employ to fix
> the former report.
> 
> Fixes: 764eef4b109a ("objtool: Rewrite alt->skip_orig")
> Reported-by: Jann Horn <jannh@google.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  tools/objtool/Documentation/stack-validation.txt |    7 ++++
>  tools/objtool/check.c                            |   34 ++++++++++++++++++++++-
>  2 files changed, 40 insertions(+), 1 deletion(-)
> 
> --- a/tools/objtool/Documentation/stack-validation.txt
> +++ b/tools/objtool/Documentation/stack-validation.txt
> @@ -315,6 +315,13 @@ they mean, and suggestions for how to fi
>        function tracing inserts additional calls, which is not obvious from the
>        sources).
>  
> +10. file.o: warning: func()+0x5c: alternative modifies stack
> +
> +    This means that an alternative includes instructions that modify the
> +    stack. The problem is that there is only one ORC unwind table, this means
> +    that the ORC unwind entries must be valid for each of the alternatives.
> +    The easiest way to enforce this is to ensure alternative do not contain

"alternatives"

Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>

-- 
Josh


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 00/14] objtool vs retpoline
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (13 preceding siblings ...)
  2020-04-28 19:11 ` [PATCH v2 14/14] x86/retpoline: Fix retpoline unwind Peter Zijlstra
@ 2020-04-28 20:17 ` Josh Poimboeuf
  2020-04-29 10:19 ` [PATCH v2.1 01-A/14] objtool: Remove check preventing branches within alternative Peter Zijlstra
                   ` (2 subsequent siblings)
  17 siblings, 0 replies; 39+ messages in thread
From: Josh Poimboeuf @ 2020-04-28 20:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: alexandre.chartre, linux-kernel, jthierry, tglx, x86, mbenes

On Tue, Apr 28, 2020 at 09:11:01PM +0200, Peter Zijlstra wrote:
> Hi,
> 
> Based on Alexandre's patches, here's a few that go on top of tip/objtool/core.
> 
> With these patches on objtool can completely understand retpolines and RSB
> stuffing, which means it can emit valid ORC unwind information for them, which
> in turn means we can now unwind through a retpoline.
> 
> New since last time:
> 
>  - 1-3, alternatives vs ORC unwind
>  - 7-9: implement some suggestions from Julien
>  - addressed feedback

For patches 2-14:

  Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>

Instead of patch 1, maybe you could grab Julien's patch to drop the
useless warning; and also grab the alt_group numbering and the
"orig->alt_group = true" bits from Alexandre's patch.

-- 
Josh


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives
  2020-04-28 19:11 ` [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives Peter Zijlstra
@ 2020-04-29  0:54   ` Brian Gerst
  2020-04-29  8:30     ` Peter Zijlstra
  0 siblings, 1 reply; 39+ messages in thread
From: Brian Gerst @ 2020-04-29  0:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Josh Poimboeuf, Alexandre Chartre, Linux Kernel Mailing List,
	jthierry, Thomas Gleixner, the arch/x86 maintainers,
	Miroslav Benes

On Tue, Apr 28, 2020 at 3:21 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> As reported by objtool:
>
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack
>
> the smap_{save,restore}() alternatives violate (the newly enforced)
> rule on stack invariance. That is, due to there only being a single
> ORC table it must be valid to any alternative. These alternatives
> violate this with the direct result that unwinds will not be correct
> in between these calls.
>
> [ In specific, since we force SMAP on for objtool, running on !SMAP
> hardware will observe a different stack-layout and the ORC unwinder
> will stumble. ]
>
> So rewrite the functions to unconditionally save/restore the flags,
> which gives an invariant stack layout irrespective of the SMAP state.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/include/asm/smap.h |   11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/include/asm/smap.h
> +++ b/arch/x86/include/asm/smap.h
> @@ -57,16 +57,19 @@ static __always_inline unsigned long sma
>  {
>         unsigned long flags;
>
> -       asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
> -                                 X86_FEATURE_SMAP)
> -                     : "=rm" (flags) : : "memory", "cc");
> +       asm volatile ("# smap_save\n\t"
> +                     "pushf; pop %0"
> +                     : "=rm" (flags) : : "memory");
> +
> +       clac();
>
>         return flags;
>  }
>
>  static __always_inline void smap_restore(unsigned long flags)
>  {
> -       asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
> +       asm volatile ("# smap_restore\n\t"
> +                     "push %0; popf"
>                       : : "g" (flags) : "memory", "cc");
>  }

POPF is an expensive instruction that should be avoided if possible.
A better solution would be to have the alternative jump over the
push/pop when SMAP is disabled.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives
  2020-04-29  0:54   ` Brian Gerst
@ 2020-04-29  8:30     ` Peter Zijlstra
  2020-04-29 10:18       ` Peter Zijlstra
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-29  8:30 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Josh Poimboeuf, Alexandre Chartre, Linux Kernel Mailing List,
	jthierry, Thomas Gleixner, the arch/x86 maintainers,
	Miroslav Benes

On Tue, Apr 28, 2020 at 08:54:05PM -0400, Brian Gerst wrote:
> On Tue, Apr 28, 2020 at 3:21 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > As reported by objtool:
> >
> >   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack
> >   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack
> >
> > the smap_{save,restore}() alternatives violate (the newly enforced)
> > rule on stack invariance. That is, due to there only being a single
> > ORC table it must be valid to any alternative. These alternatives
> > violate this with the direct result that unwinds will not be correct
> > in between these calls.
> >
> > [ In specific, since we force SMAP on for objtool, running on !SMAP
> > hardware will observe a different stack-layout and the ORC unwinder
> > will stumble. ]
> >
> > So rewrite the functions to unconditionally save/restore the flags,
> > which gives an invariant stack layout irrespective of the SMAP state.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  arch/x86/include/asm/smap.h |   11 +++++++----
> >  1 file changed, 7 insertions(+), 4 deletions(-)
> >
> > --- a/arch/x86/include/asm/smap.h
> > +++ b/arch/x86/include/asm/smap.h
> > @@ -57,16 +57,19 @@ static __always_inline unsigned long sma
> >  {
> >         unsigned long flags;
> >
> > -       asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
> > -                                 X86_FEATURE_SMAP)
> > -                     : "=rm" (flags) : : "memory", "cc");
> > +       asm volatile ("# smap_save\n\t"
> > +                     "pushf; pop %0"
> > +                     : "=rm" (flags) : : "memory");
> > +
> > +       clac();
> >
> >         return flags;
> >  }
> >
> >  static __always_inline void smap_restore(unsigned long flags)
> >  {
> > -       asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
> > +       asm volatile ("# smap_restore\n\t"
> > +                     "push %0; popf"
> >                       : : "g" (flags) : "memory", "cc");
> >  }
> 
> POPF is an expensive instruction that should be avoided if possible.
> A better solution would be to have the alternative jump over the
> push/pop when SMAP is disabled.

Yeah. I think I had that, but then confused myself again. I don't think
it matters much if you look at where it's used though.

Still, let me try the jmp thing again..

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives
  2020-04-29  8:30     ` Peter Zijlstra
@ 2020-04-29 10:18       ` Peter Zijlstra
  2020-04-29 12:12         ` Brian Gerst
  2020-05-01 18:22         ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  0 siblings, 2 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-29 10:18 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Josh Poimboeuf, Alexandre Chartre, Linux Kernel Mailing List,
	jthierry, Thomas Gleixner, the arch/x86 maintainers,
	Miroslav Benes

On Wed, Apr 29, 2020 at 10:30:53AM +0200, Peter Zijlstra wrote:
> > POPF is an expensive instruction that should be avoided if possible.
> > A better solution would be to have the alternative jump over the
> > push/pop when SMAP is disabled.
> 
> Yeah. I think I had that, but then confused myself again. I don't think
> it matters much if you look at where it's used though.
> 
> Still, let me try the jmp thing again..

Here goes..

---
Subject: x86,smap: Fix smap_{save,restore}() alternatives
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue Apr 28 19:57:59 CEST 2020

As reported by objtool:

  lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack
  lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack

the smap_{save,restore}() alternatives violate (the newly enforced)
rule on stack invariance. That is, due to there only being a single
ORC table it must be valid to any alternative. These alternatives
violate this with the direct result that unwinds will not be correct
when it hits between the PUSH and POP instructions.

Rewrite the functions to only have a conditional jump.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/smap.h |   11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -57,8 +57,10 @@ static __always_inline unsigned long sma
 {
 	unsigned long flags;
 
-	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
-				  X86_FEATURE_SMAP)
+	asm volatile ("# smap_save\n\t"
+		      ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+		      "pushf; pop %0; " __ASM_CLAC "\n\t"
+		      "1:"
 		      : "=rm" (flags) : : "memory", "cc");
 
 	return flags;
@@ -66,7 +68,10 @@ static __always_inline unsigned long sma
 
 static __always_inline void smap_restore(unsigned long flags)
 {
-	asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+	asm volatile ("# smap_restore\n\t"
+		      ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+		      "push %0; popf\n\t"
+		      "1:"
 		      : : "g" (flags) : "memory", "cc");
 }
 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2.1 01-A/14] objtool: Remove check preventing branches within alternative
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (14 preceding siblings ...)
  2020-04-28 20:17 ` [PATCH v2 00/14] objtool vs retpoline Josh Poimboeuf
@ 2020-04-29 10:19 ` Peter Zijlstra
  2020-04-29 10:21 ` [PATCH v2.1 01-B/14] objtool: Uniquely identify alternative instruction groups Peter Zijlstra
  2020-04-29 16:46 ` [PATCH v2 00/14] objtool vs retpoline Miroslav Benes
  17 siblings, 0 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-29 10:19 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre; +Cc: linux-kernel, jthierry, tglx, x86, mbenes

Subject: objtool: Remove check preventing branches within alternative
From: Julien Thierry <jthierry@redhat.com>
Date: Fri, 27 Mar 2020 15:28:42 +0000

From: Julien Thierry <jthierry@redhat.com>

While jumping from outside an alternative region to the middle of an
alternative region is very likely wrong, jumping from an alternative
region into the same region is valid. It is a common pattern on arm64.

The first pattern is unlikely to happen in practice and checking only
for this adds a lot of complexity.

Just remove the current check.

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200327152847.15294-6-jthierry@redhat.com
---
 tools/objtool/check.c |    6 ------
 1 file changed, 6 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2180,12 +2180,6 @@ static int validate_branch(struct objtoo
 
 	sec = insn->sec;
 
-	if (insn->alt_group && list_empty(&insn->alts)) {
-		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-			  sec, insn->offset);
-		return 1;
-	}
-
 	while (1) {
 		next_insn = next_insn_same_sec(file, insn);
 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2.1 01-B/14] objtool: Uniquely identify alternative instruction groups
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (15 preceding siblings ...)
  2020-04-29 10:19 ` [PATCH v2.1 01-A/14] objtool: Remove check preventing branches within alternative Peter Zijlstra
@ 2020-04-29 10:21 ` Peter Zijlstra
  2020-04-29 16:46 ` [PATCH v2 00/14] objtool vs retpoline Miroslav Benes
  17 siblings, 0 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-29 10:21 UTC (permalink / raw)
  To: jpoimboe, alexandre.chartre; +Cc: linux-kernel, jthierry, tglx, x86, mbenes

Subject: objtool: Uniquely identify alternative instruction groups
From: Alexandre Chartre <alexandre.chartre@oracle.com>
Date: Tue, 14 Apr 2020 12:36:11 +0200

From: Alexandre Chartre <alexandre.chartre@oracle.com>

Assign a unique identifier to every alternative instruction group in
order to be able to tell which instructions belong to what
alternative.

[peterz: extracted from a larger patch]
Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |   26 ++++++++++++++++++++------
 tools/objtool/check.h |    3 ++-
 2 files changed, 22 insertions(+), 7 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -770,7 +770,9 @@ static int handle_group_alt(struct objto
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
+	static unsigned int alt_group_next_index = 1;
 	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+	unsigned int alt_group = alt_group_next_index++;
 	unsigned long dest_off;
 
 	last_orig_insn = NULL;
@@ -779,7 +781,7 @@ static int handle_group_alt(struct objto
 		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
 			break;
 
-		insn->alt_group = true;
+		insn->alt_group = alt_group;
 		last_orig_insn = insn;
 	}
 
@@ -813,6 +815,7 @@ static int handle_group_alt(struct objto
 	}
 
 	last_new_insn = NULL;
+	alt_group = alt_group_next_index++;
 	insn = *new_insn;
 	sec_for_each_insn_from(file, insn) {
 		if (insn->offset >= special_alt->new_off + special_alt->new_len)
@@ -822,6 +825,7 @@ static int handle_group_alt(struct objto
 
 		insn->ignore = orig_insn->ignore_alts;
 		insn->func = orig_insn->func;
+		insn->alt_group = alt_group;
 
 		/*
 		 * Since alternative replacement code is copy/pasted by the
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -30,12 +30,13 @@ struct instruction {
 	unsigned int len;
 	enum insn_type type;
 	unsigned long immediate;
-	bool alt_group, dead_end, ignore, ignore_alts;
+	bool dead_end, ignore, ignore_alts;
 	bool hint;
 	bool retpoline_safe;
 	s8 instr;
 	u8 visited;
 	u8 ret_offset;
+	int alt_group;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives
  2020-04-29 10:18       ` Peter Zijlstra
@ 2020-04-29 12:12         ` Brian Gerst
  2020-05-01 18:22         ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 39+ messages in thread
From: Brian Gerst @ 2020-04-29 12:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Josh Poimboeuf, Alexandre Chartre, Linux Kernel Mailing List,
	jthierry, Thomas Gleixner, the arch/x86 maintainers,
	Miroslav Benes

On Wed, Apr 29, 2020 at 6:18 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Apr 29, 2020 at 10:30:53AM +0200, Peter Zijlstra wrote:
> > > POPF is an expensive instruction that should be avoided if possible.
> > > A better solution would be to have the alternative jump over the
> > > push/pop when SMAP is disabled.
> >
> > Yeah. I think I had that, but then confused myself again. I don't think
> > it matters much if you look at where it's used though.
> >
> > Still, let me try the jmp thing again..
>
> Here goes..
>
> ---
> Subject: x86,smap: Fix smap_{save,restore}() alternatives
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Tue Apr 28 19:57:59 CEST 2020
>
> As reported by objtool:
>
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack
>   lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack
>
> the smap_{save,restore}() alternatives violate (the newly enforced)
> rule on stack invariance. That is, due to there only being a single
> ORC table it must be valid to any alternative. These alternatives
> violate this with the direct result that unwinds will not be correct
> when it hits between the PUSH and POP instructions.
>
> Rewrite the functions to only have a conditional jump.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/include/asm/smap.h |   11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> --- a/arch/x86/include/asm/smap.h
> +++ b/arch/x86/include/asm/smap.h
> @@ -57,8 +57,10 @@ static __always_inline unsigned long sma
>  {
>         unsigned long flags;
>
> -       asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
> -                                 X86_FEATURE_SMAP)
> +       asm volatile ("# smap_save\n\t"
> +                     ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
> +                     "pushf; pop %0; " __ASM_CLAC "\n\t"
> +                     "1:"
>                       : "=rm" (flags) : : "memory", "cc");
>
>         return flags;
> @@ -66,7 +68,10 @@ static __always_inline unsigned long sma
>
>  static __always_inline void smap_restore(unsigned long flags)
>  {
> -       asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
> +       asm volatile ("# smap_restore\n\t"
> +                     ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
> +                     "push %0; popf\n\t"
> +                     "1:"
>                       : : "g" (flags) : "memory", "cc");
>  }
>

Looks good.  Alternatively, you could use static_cpu_has(X86_FEATURE_SMAP).

--
Brian Gerst

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 02/14] objtool: Fix ORC vs alternatives
  2020-04-28 19:11 ` [PATCH v2 02/14] objtool: Fix ORC vs alternatives Peter Zijlstra
  2020-04-28 19:55   ` Josh Poimboeuf
@ 2020-04-29 14:33   ` Miroslav Benes
  2020-04-29 15:51     ` Peter Zijlstra
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2 siblings, 1 reply; 39+ messages in thread
From: Miroslav Benes @ 2020-04-29 14:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: jpoimboe, alexandre.chartre, linux-kernel, jthierry, tglx, x86,
	Jann Horn

On Tue, 28 Apr 2020, Peter Zijlstra wrote:

> Jann reported that (for instance) entry_64.o:general_protection has
> very odd ORC data:
> 
>   0000000000000f40 <general_protection>:
>   #######sp:sp+8 bp:(und) type:iret end:0
>     f40:       90                      nop
>   #######sp:(und) bp:(und) type:call end:0
>     f41:       90                      nop
>     f42:       90                      nop
>   #######sp:sp+8 bp:(und) type:iret end:0
>     f43:       e8 a8 01 00 00          callq  10f0 <error_entry>
>   #######sp:sp+0 bp:(und) type:regs end:0
>     f48:       f6 84 24 88 00 00 00    testb  $0x3,0x88(%rsp)
>     f4f:       03
>     f50:       74 00                   je     f52 <general_protection+0x12>
>     f52:       48 89 e7                mov    %rsp,%rdi
>     f55:       48 8b 74 24 78          mov    0x78(%rsp),%rsi
>     f5a:       48 c7 44 24 78 ff ff    movq   $0xffffffffffffffff,0x78(%rsp)
>     f61:       ff ff
>     f63:       e8 00 00 00 00          callq  f68 <general_protection+0x28>
>     f68:       e9 73 02 00 00          jmpq   11e0 <error_exit>
>   #######sp:(und) bp:(und) type:call end:0
>     f6d:       0f 1f 00                nopl   (%rax)
> 
> Note the entry at 0xf41. Josh found this was the result of commit:
> 
>   764eef4b109a ("objtool: Rewrite alt->skip_orig")
> 
> Due to the early return in validate_branch() we no longer set
> insn->cfi of the original instruction stream (the NOPs at 0xf41 and
> 0xf42) and we'll end up with the above weirdness.
> 
> In other discussions we realized alternatives should be ORC invariant;
> that is, due to there being only a single ORC table, it must be valid
> for all alternatives. The easiest way to ensure this is to not allow
> any stack modifications in alternatives.
> 
> When we enforce this latter observation, we get the property that the
> whole alternative must have the same CFI, which we can employ to fix
> the former report.
> 
> Fixes: 764eef4b109a ("objtool: Rewrite alt->skip_orig")
> Reported-by: Jann Horn <jannh@google.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  tools/objtool/Documentation/stack-validation.txt |    7 ++++
>  tools/objtool/check.c                            |   34 ++++++++++++++++++++++-
>  2 files changed, 40 insertions(+), 1 deletion(-)
> 
> --- a/tools/objtool/Documentation/stack-validation.txt
> +++ b/tools/objtool/Documentation/stack-validation.txt
> @@ -315,6 +315,13 @@ they mean, and suggestions for how to fi
>        function tracing inserts additional calls, which is not obvious from the
>        sources).
>  
> +10. file.o: warning: func()+0x5c: alternative modifies stack
> +
> +    This means that an alternative includes instructions that modify the
> +    stack. The problem is that there is only one ORC unwind table, this means
> +    that the ORC unwind entries must be valid for each of the alternatives.
> +    The easiest way to enforce this is to ensure alternative do not contain
> +    any ORC entries, which in turn implies the above constraint.
>  
>  If the error doesn't seem to make sense, it could be a bug in objtool.
>  Feel free to ask the objtool maintainer for help.
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -2001,6 +2001,11 @@ static int handle_insn_ops(struct instru
>  	list_for_each_entry(op, &insn->stack_ops, list) {
>  		int res;
>  
> +		if (insn->alt_group) {
> +			WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
> +			return -1;
> +		}
> +
>  		res = update_cfi_state(insn, &state->cfi, op);
>  		if (res)
>  			return res;
> @@ -2177,6 +2182,30 @@ static bool is_branch_to_alternative(str
>  }
>  
>  /*
> + * Alternatives should not contain any ORC entries, this in turn means they
> + * should not contain any CFI ops, which implies all instructions should have
> + * the same same CFI state.
> + *
> + * It is possible to constuct alternatives that have unreachable holes that go
> + * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
> + * states which then results in ORC entries, which we just said we didn't want.
> + *
> + * Avoid them by copying the CFI entry of the first instruction into the whole
> + * alternative.
> + */
> +static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
> +{
> +	struct instruction *first_insn = insn;
> +	int alt_group = insn->alt_group;
> +
> +	sec_for_each_insn_continue(file, insn) {
> +		if (insn->alt_group != alt_group)
> +			break;
> +		insn->cfi = first_insn->cfi;
> +	}
> +}

If I am reading this and previous patch correctly...

The function would copy cfi only to "orig" alternative (its insn->alts is 
non-empty, orig_insn->alt_group differs from new_insn->alt_group), right? 
Would it make sense to do the same for "new" alternative, because of the 
invariant? It seems to me it is not processed anywhere that way.

Am I missing something? Whenever I try to read all this alternatives 
handling in objtool, I get lost pretty soon.

> +
> +/*
>   * Follow the branch starting at the given instruction, and recursively follow
>   * any other branches (jumps).  Meanwhile, track the frame pointer state at
>   * each instruction and validate all the rules described in
> @@ -2234,7 +2263,7 @@ static int validate_branch(struct objtoo
>  
>  		insn->visited |= visited;
>  
> -		if (!insn->ignore_alts) {
> +		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
>  			bool skip_orig = false;
>  
>  			list_for_each_entry(alt, &insn->alts, list) {
> @@ -2249,6 +2278,9 @@ static int validate_branch(struct objtoo
>  				}
>  			}
>  
> +			if (insn->alt_group)
> +				fill_alternative_cfi(file, insn);
> +

fill_alternative_cfi() is called here only for orig_insn, isn't it?

>  			if (skip_orig)
>  				return 0;
>  		}

Miroslav

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 02/14] objtool: Fix ORC vs alternatives
  2020-04-29 14:33   ` Miroslav Benes
@ 2020-04-29 15:51     ` Peter Zijlstra
  2020-04-29 16:41       ` Miroslav Benes
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2020-04-29 15:51 UTC (permalink / raw)
  To: Miroslav Benes
  Cc: jpoimboe, alexandre.chartre, linux-kernel, jthierry, tglx, x86,
	Jann Horn

On Wed, Apr 29, 2020 at 04:33:31PM +0200, Miroslav Benes wrote:
> On Tue, 28 Apr 2020, Peter Zijlstra wrote:
> >  /*
> > + * Alternatives should not contain any ORC entries, this in turn means they
> > + * should not contain any CFI ops, which implies all instructions should have
> > + * the same same CFI state.
> > + *
> > + * It is possible to constuct alternatives that have unreachable holes that go
> > + * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
> > + * states which then results in ORC entries, which we just said we didn't want.
> > + *
> > + * Avoid them by copying the CFI entry of the first instruction into the whole
> > + * alternative.
> > + */
> > +static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
> > +{
> > +	struct instruction *first_insn = insn;
> > +	int alt_group = insn->alt_group;
> > +
> > +	sec_for_each_insn_continue(file, insn) {
> > +		if (insn->alt_group != alt_group)
> > +			break;
> > +		insn->cfi = first_insn->cfi;
> > +	}
> > +}
> 
> If I am reading this and previous patch correctly...
> 
> The function would copy cfi only to "orig" alternative (its insn->alts is 
> non-empty, orig_insn->alt_group differs from new_insn->alt_group), right? 

Yes.

> Would it make sense to do the same for "new" alternative, because of the 
> invariant? It seems to me it is not processed anywhere that way.

No.

> Am I missing something? Whenever I try to read all this alternatives 
> handling in objtool, I get lost pretty soon.

We only care about the ORC covering the original range, because that is
the range we execute code from. The memory where we store the
alternative instructions (.altinstruction section) is never executed,
that is, RIP should never point there, so we don't need ORC data covering
it.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 02/14] objtool: Fix ORC vs alternatives
  2020-04-29 15:51     ` Peter Zijlstra
@ 2020-04-29 16:41       ` Miroslav Benes
  0 siblings, 0 replies; 39+ messages in thread
From: Miroslav Benes @ 2020-04-29 16:41 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: jpoimboe, alexandre.chartre, linux-kernel, jthierry, tglx, x86,
	Jann Horn

On Wed, 29 Apr 2020, Peter Zijlstra wrote:

> On Wed, Apr 29, 2020 at 04:33:31PM +0200, Miroslav Benes wrote:
> > On Tue, 28 Apr 2020, Peter Zijlstra wrote:
> > >  /*
> > > + * Alternatives should not contain any ORC entries, this in turn means they
> > > + * should not contain any CFI ops, which implies all instructions should have
> > > + * the same same CFI state.
> > > + *
> > > + * It is possible to constuct alternatives that have unreachable holes that go
> > > + * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
> > > + * states which then results in ORC entries, which we just said we didn't want.
> > > + *
> > > + * Avoid them by copying the CFI entry of the first instruction into the whole
> > > + * alternative.
> > > + */
> > > +static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
> > > +{
> > > +	struct instruction *first_insn = insn;
> > > +	int alt_group = insn->alt_group;
> > > +
> > > +	sec_for_each_insn_continue(file, insn) {
> > > +		if (insn->alt_group != alt_group)
> > > +			break;
> > > +		insn->cfi = first_insn->cfi;
> > > +	}
> > > +}
> > 
> > If I am reading this and previous patch correctly...
> > 
> > The function would copy cfi only to "orig" alternative (its insn->alts is 
> > non-empty, orig_insn->alt_group differs from new_insn->alt_group), right? 
> 
> Yes.
> 
> > Would it make sense to do the same for "new" alternative, because of the 
> > invariant? It seems to me it is not processed anywhere that way.
> 
> No.
> 
> > Am I missing something? Whenever I try to read all this alternatives 
> > handling in objtool, I get lost pretty soon.
> 
> We only care about the ORC covering the original range, because that is
> the range we execute code from. The memory where we store the
> alternative instructions (.altinstruction section) is never executed,
> that is, RIP should never point there, so we don't need ORC data covering
> it.

Aha, that's what I didn't realize (again). Note to myself (for the 
hundredth time): alternatives are not branches.

Thanks
Miroslav

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 00/14] objtool vs retpoline
  2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
                   ` (16 preceding siblings ...)
  2020-04-29 10:21 ` [PATCH v2.1 01-B/14] objtool: Uniquely identify alternative instruction groups Peter Zijlstra
@ 2020-04-29 16:46 ` Miroslav Benes
  17 siblings, 0 replies; 39+ messages in thread
From: Miroslav Benes @ 2020-04-29 16:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: jpoimboe, alexandre.chartre, linux-kernel, jthierry, tglx, x86

On Tue, 28 Apr 2020, Peter Zijlstra wrote:

> Hi,
> 
> Based on Alexandre's patches, here's a few that go on top of tip/objtool/core.
> 
> With these patches on objtool can completely understand retpolines and RSB
> stuffing, which means it can emit valid ORC unwind information for them, which
> in turn means we can now unwind through a retpoline.
> 
> New since last time:
> 
>  - 1-3, alternatives vs ORC unwind
>  - 7-9: implement some suggestions from Julien
>  - addressed feedback

You can add my

Reviewed-by: Miroslav Benes <mbenes@suse.cz>

to patches 1A, 1B and 2-10 (objtool patches and updated smap fix). The 
other four patches should be fine too, but I am not well versed in the 
speculation stuff.

Miroslav

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] x86/retpoline: Fix retpoline unwind
  2020-04-28 19:11 ` [PATCH v2 14/14] x86/retpoline: Fix retpoline unwind Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     cc1ac9c792810b93783a7de344f428922af8d98c
Gitweb:        https://git.kernel.org/tip/cc1ac9c792810b93783a7de344f428922af8d98c
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 16 Apr 2020 14:34:26 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:34 +02:00

x86/retpoline: Fix retpoline unwind

Currently objtool cannot understand retpolines, and thus cannot
generate ORC unwind information for them. This means that we cannot
unwind from the middle of a retpoline.

The recent ANNOTATE_INTRA_FUNCTION_CALL and UNWIND_HINT_RET_OFFSET
support in objtool enables it to understand the basic retpoline
construct. A further problem is that the ORC unwind information is
alternative invariant; IOW. every alternative should have the same
ORC, retpolines obviously violate this. This means we need to
out-of-line them.

Since all GCC generated code already uses out-of-line retpolines, this
should not affect performance much, if anything.

This will enable objtool to generate valid ORC data for the
out-of-line copies, which means we can correctly and reliably unwind
through a retpoline.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191700.210835357@infradead.org
---
 arch/x86/include/asm/asm-prototypes.h |  7 +++-
 arch/x86/include/asm/nospec-branch.h  | 56 +++-----------------------
 arch/x86/lib/retpoline.S              | 26 ++++++++++--
 3 files changed, 38 insertions(+), 51 deletions(-)

diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index aa7585e..9bf2620 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -21,8 +21,15 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
 	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
+#define DECL_RETPOLINE(reg) \
+	extern asmlinkage void __x86_retpoline_ ## reg (void);
+
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
+#undef GEN
+#define GEN(reg) DECL_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
+
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d3269b6..d52d1aa 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -13,15 +13,6 @@
 #include <asm/unwind_hints.h>
 
 /*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
-	ANNOTATE_IGNORE_ALTERNATIVE
-
-/*
  * Fill the CPU return stack buffer.
  *
  * Each entry in the RSB, if used for a speculative 'ret', contains an
@@ -83,44 +74,15 @@
 .endm
 
 /*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
- */
-.macro RETPOLINE_JMP reg:req
-	call	.Ldo_rop_\@
-.Lspec_trap_\@:
-	pause
-	lfence
-	jmp	.Lspec_trap_\@
-.Ldo_rop_\@:
-	mov	\reg, (%_ASM_SP)
-	ret
-.endm
-
-/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
- */
-.macro RETPOLINE_CALL reg:req
-	jmp	.Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
-	RETPOLINE_JMP \reg
-.Ldo_call_\@:
-	call	.Ldo_retpoline_jmp_\@
-.endm
-
-/*
  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
  * indirect jmp/call which may be susceptible to the Spectre variant 2
  * attack.
  */
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg),	\
-		__stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+		      __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	jmp	*%\reg
 #endif
@@ -128,10 +90,9 @@
 
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\
-		__stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+		      __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	call	*%\reg
 #endif
@@ -165,16 +126,16 @@
  * which is ensured when CONFIG_RETPOLINE is defined.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
+	"call __x86_retpoline_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
 	X86_FEATURE_RETPOLINE_AMD)
+
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
 #else /* CONFIG_X86_32 */
@@ -184,7 +145,6 @@
  * here, anyway.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 9cc5480..b4c43a9 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,15 +7,31 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
+#include <asm/frame.h>
 
 .macro THUNK reg
 	.section .text.__x86.indirect_thunk
 
+	.align 32
 SYM_FUNC_START(__x86_indirect_thunk_\reg)
-	CFI_STARTPROC
-	JMP_NOSPEC %\reg
-	CFI_ENDPROC
+	JMP_NOSPEC \reg
 SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+	ANNOTATE_INTRA_FUNCTION_CALL
+	call	.Ldo_rop_\@
+.Lspec_trap_\@:
+	UNWIND_HINT_EMPTY
+	pause
+	lfence
+	jmp	.Lspec_trap_\@
+.Ldo_rop_\@:
+	mov	%\reg, (%_ASM_SP)
+	UNWIND_HINT_RET_OFFSET
+	ret
+SYM_FUNC_END(__x86_retpoline_\reg)
+
 .endm
 
 /*
@@ -32,6 +48,7 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 
 #define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
 #define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
 
 #undef GEN
 #define GEN(reg) THUNK reg
@@ -41,3 +58,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
+#undef GEN
+#define GEN(reg) EXPORT_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] x86: Simplify retpoline declaration
  2020-04-28 19:11 ` [PATCH v2 12/14] x86: Simplify retpoline declaration Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     ca3f0d80dd57c8828bfb5bc0bc79750ea7a1ba26
Gitweb:        https://git.kernel.org/tip/ca3f0d80dd57c8828bfb5bc0bc79750ea7a1ba26
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 22 Apr 2020 17:03:22 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:34 +02:00

x86: Simplify retpoline declaration

Because of how KSYM works, we need one declaration per line. Seeing
how we're going to be doubling the amount of retpoline symbols,
simplify the machinery in order to avoid having to copy/paste even
more.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191700.091696925@infradead.org
---
 arch/x86/include/asm/GEN-for-each-reg.h | 25 ++++++++++++++++-
 arch/x86/include/asm/asm-prototypes.h   | 28 +++++-------------
 arch/x86/lib/retpoline.S                | 37 ++++++++++--------------
 3 files changed, 49 insertions(+), 41 deletions(-)
 create mode 100644 arch/x86/include/asm/GEN-for-each-reg.h

diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
new file mode 100644
index 0000000..1b07fb1
--- /dev/null
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -0,0 +1,25 @@
+#ifdef CONFIG_64BIT
+GEN(rax)
+GEN(rbx)
+GEN(rcx)
+GEN(rdx)
+GEN(rsi)
+GEN(rdi)
+GEN(rbp)
+GEN(r8)
+GEN(r9)
+GEN(r10)
+GEN(r11)
+GEN(r12)
+GEN(r13)
+GEN(r14)
+GEN(r15)
+#else
+GEN(eax)
+GEN(ebx)
+GEN(ecx)
+GEN(edx)
+GEN(esi)
+GEN(edi)
+GEN(ebp)
+#endif
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ce92c4a..aa7585e 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -17,24 +17,12 @@ extern void cmpxchg8b_emu(void);
 #endif
 
 #ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
-#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
-INDIRECT_THUNK(8)
-INDIRECT_THUNK(9)
-INDIRECT_THUNK(10)
-INDIRECT_THUNK(11)
-INDIRECT_THUNK(12)
-INDIRECT_THUNK(13)
-INDIRECT_THUNK(14)
-INDIRECT_THUNK(15)
-#endif
-INDIRECT_THUNK(ax)
-INDIRECT_THUNK(bx)
-INDIRECT_THUNK(cx)
-INDIRECT_THUNK(dx)
-INDIRECT_THUNK(si)
-INDIRECT_THUNK(di)
-INDIRECT_THUNK(bp)
+
+#define DECL_INDIRECT_THUNK(reg) \
+	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+
+#undef GEN
+#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 363ec13..9cc5480 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -24,25 +24,20 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
  * only see one instance of "__x86_indirect_thunk_\reg" rather
  * than one per register with the correct names. So we do it
  * the simple and nasty way...
+ *
+ * Worse, you can only have a single EXPORT_SYMBOL per line,
+ * and CPP can't insert newlines, so we have to repeat everything
+ * at least twice.
  */
-#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
-
-GENERATE_THUNK(_ASM_AX)
-GENERATE_THUNK(_ASM_BX)
-GENERATE_THUNK(_ASM_CX)
-GENERATE_THUNK(_ASM_DX)
-GENERATE_THUNK(_ASM_SI)
-GENERATE_THUNK(_ASM_DI)
-GENERATE_THUNK(_ASM_BP)
-#ifdef CONFIG_64BIT
-GENERATE_THUNK(r8)
-GENERATE_THUNK(r9)
-GENERATE_THUNK(r10)
-GENERATE_THUNK(r11)
-GENERATE_THUNK(r12)
-GENERATE_THUNK(r13)
-GENERATE_THUNK(r14)
-GENERATE_THUNK(r15)
-#endif
+
+#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+
+#undef GEN
+#define GEN(reg) THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] x86/speculation: Change FILL_RETURN_BUFFER to work with objtool
  2020-04-28 19:11 ` [PATCH v2 11/14] x86/speculation: Change FILL_RETURN_BUFFER to work with objtool Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel), Alexandre Chartre, Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     089dd8e53126ebaf506e2dc0bf89d652c36bfc12
Gitweb:        https://git.kernel.org/tip/089dd8e53126ebaf506e2dc0bf89d652c36bfc12
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Tue, 14 Apr 2020 12:36:16 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:34 +02:00

x86/speculation: Change FILL_RETURN_BUFFER to work with objtool

Change FILL_RETURN_BUFFER so that objtool groks it and can generate
correct ORC unwind information.

 - Since ORC is alternative invariant; that is, all alternatives
   should have the same ORC entries, the __FILL_RETURN_BUFFER body
   can not be part of an alternative.

   Therefore, move it out of the alternative and keep the alternative
   as a sort of jump_label around it.

 - Use the ANNOTATE_INTRA_FUNCTION_CALL annotation to white-list
   these 'funny' call instructions to nowhere.

 - Use UNWIND_HINT_EMPTY to 'fill' the speculation traps, otherwise
   objtool will consider them unreachable.

 - Move the RSP adjustment into the loop, such that the loop has a
   deterministic stack layout.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191700.032079304@infradead.org
---
 arch/x86/include/asm/nospec-branch.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 7e9a281..b8890e1 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,11 +4,13 @@
 #define _ASM_X86_NOSPEC_BRANCH_H_
 
 #include <linux/static_key.h>
+#include <linux/frame.h>
 
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
 
 /*
  * This should be used immediately before a retpoline alternative. It tells
@@ -46,21 +48,25 @@
 #define __FILL_RETURN_BUFFER(reg, nr, sp)	\
 	mov	$(nr/2), reg;			\
 771:						\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
 	call	772f;				\
 773:	/* speculation trap */			\
+	UNWIND_HINT_EMPTY;			\
 	pause;					\
 	lfence;					\
 	jmp	773b;				\
 772:						\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
 	call	774f;				\
 775:	/* speculation trap */			\
+	UNWIND_HINT_EMPTY;			\
 	pause;					\
 	lfence;					\
 	jmp	775b;				\
 774:						\
+	add	$(BITS_PER_LONG/8) * 2, sp;	\
 	dec	reg;				\
-	jnz	771b;				\
-	add	$(BITS_PER_LONG/8) * nr, sp;
+	jnz	771b;
 
 #ifdef __ASSEMBLY__
 
@@ -137,10 +143,8 @@
   */
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
-		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
-		\ftr
+	ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+	__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
 .Lskip_rsb_\@:
 #endif
 .endm

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] x86: Change {JMP,CALL}_NOSPEC argument
  2020-04-28 19:11 ` [PATCH v2 13/14] x86: Change {JMP,CALL}_NOSPEC argument Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     34fdce6981b96920ced4e0ee56e9db3fb03a33f0
Gitweb:        https://git.kernel.org/tip/34fdce6981b96920ced4e0ee56e9db3fb03a33f0
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 22 Apr 2020 17:16:40 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:34 +02:00

x86: Change {JMP,CALL}_NOSPEC argument

In order to change the {JMP,CALL}_NOSPEC macros to call out-of-line
versions of the retpoline magic, we need to remove the '%' from the
argument, such that we can paste it onto symbol names.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191700.151623523@infradead.org
---
 arch/x86/crypto/aesni-intel_asm.S            |  4 +--
 arch/x86/crypto/camellia-aesni-avx-asm_64.S  |  2 +-
 arch/x86/crypto/camellia-aesni-avx2-asm_64.S |  2 +-
 arch/x86/crypto/crc32c-pcl-intel-asm_64.S    | 26 +++++++++----------
 arch/x86/entry/entry_32.S                    |  6 ++--
 arch/x86/entry/entry_64.S                    |  2 +-
 arch/x86/include/asm/nospec-branch.h         | 16 ++++++------
 arch/x86/kernel/ftrace_32.S                  |  2 +-
 arch/x86/kernel/ftrace_64.S                  |  4 +--
 arch/x86/lib/checksum_32.S                   |  4 +--
 arch/x86/platform/efi/efi_stub_64.S          |  2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index cad6e1b..54e7d15 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
 	pxor INC, STATE4
 	movdqu IV, 0x30(OUTP)
 
-	CALL_NOSPEC %r11
+	CALL_NOSPEC r11
 
 	movdqu 0x00(OUTP), INC
 	pxor INC, STATE1
@@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
 	_aesni_gf128mul_x_ble()
 	movups IV, (IVP)
 
-	CALL_NOSPEC %r11
+	CALL_NOSPEC r11
 
 	movdqu 0x40(OUTP), INC
 	pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index d01ddd7..ecc0a9a 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-	CALL_NOSPEC %r9;
+	CALL_NOSPEC r9;
 
 	addq $(16 * 16), %rsp;
 
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 563ef6e..0907243 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-	CALL_NOSPEC %r9;
+	CALL_NOSPEC r9;
 
 	addq $(16 * 32), %rsp;
 
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 0e6690e..8501ec4 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -75,7 +75,7 @@
 
 .text
 SYM_FUNC_START(crc_pcl)
-#define    bufp		%rdi
+#define    bufp		rdi
 #define    bufp_dw	%edi
 #define    bufp_w	%di
 #define    bufp_b	%dil
@@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl)
 	## 1) ALIGN:
 	################################################################
 
-	mov     bufp, bufptmp		# rdi = *buf
-	neg     bufp
-	and     $7, bufp		# calculate the unalignment amount of
+	mov     %bufp, bufptmp		# rdi = *buf
+	neg     %bufp
+	and     $7, %bufp		# calculate the unalignment amount of
 					# the address
 	je      proc_block		# Skip if aligned
 
@@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl)
 do_align:
 	#### Calculate CRC of unaligned bytes of the buffer (if any)
 	movq    (bufptmp), tmp		# load a quadward from the buffer
-	add     bufp, bufptmp		# align buffer pointer for quadword
+	add     %bufp, bufptmp		# align buffer pointer for quadword
 					# processing
-	sub     bufp, len		# update buffer length
+	sub     %bufp, len		# update buffer length
 align_loop:
 	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
 	shr     $8, tmp			# get next byte
-	dec     bufp
+	dec     %bufp
 	jne     align_loop
 
 proc_block:
@@ -169,10 +169,10 @@ continue_block:
 	xor     crc2, crc2
 
 	## branch into array
-	lea	jump_table(%rip), bufp
-	movzxw  (bufp, %rax, 2), len
-	lea	crc_array(%rip), bufp
-	lea     (bufp, len, 1), bufp
+	lea	jump_table(%rip), %bufp
+	movzxw  (%bufp, %rax, 2), len
+	lea	crc_array(%rip), %bufp
+	lea     (%bufp, len, 1), %bufp
 	JMP_NOSPEC bufp
 
 	################################################################
@@ -218,9 +218,9 @@ LABEL crc_ %i
 	## 4) Combine three results:
 	################################################################
 
-	lea	(K_table-8)(%rip), bufp		# first entry is for idx 1
+	lea	(K_table-8)(%rip), %bufp		# first entry is for idx 1
 	shlq    $3, %rax			# rax *= 8
-	pmovzxdq (bufp,%rax), %xmm0		# 2 consts: K1:K2
+	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
 	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
 	subq    %rax, tmp			# tmp -= rax*24
 
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index b67bae7..7e7ffb7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork)
 
 	/* kernel thread */
 1:	movl	%edi, %eax
-	CALL_NOSPEC %ebx
+	CALL_NOSPEC ebx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
 
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
+	CALL_NOSPEC edi
 	jmp	ret_from_exception
 SYM_CODE_END(common_exception_read_cr2)
 
@@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception)
 
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
+	CALL_NOSPEC edi
 	jmp	ret_from_exception
 SYM_CODE_END(common_exception)
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504f..168b798 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -349,7 +349,7 @@ SYM_CODE_START(ret_from_fork)
 	/* kernel thread */
 	UNWIND_HINT_EMPTY
 	movq	%r12, %rdi
-	CALL_NOSPEC %rbx
+	CALL_NOSPEC rbx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index b8890e1..d3269b6 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -118,22 +118,22 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\
-		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg),	\
+		__stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
-	jmp	*\reg
+	jmp	*%\reg
 #endif
 .endm
 
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\
-		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\
+		__stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
-	call	*\reg
+	call	*%\reg
 #endif
 .endm
 
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index e8a9f83..e405fe1 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -189,5 +189,5 @@ return_to_handler:
 	movl	%eax, %ecx
 	popl	%edx
 	popl	%eax
-	JMP_NOSPEC %ecx
+	JMP_NOSPEC ecx
 #endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 9738ed2..aa5d28a 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -301,7 +301,7 @@ trace:
 	 * function tracing is enabled.
 	 */
 	movq ftrace_trace_function, %r8
-	CALL_NOSPEC %r8
+	CALL_NOSPEC r8
 	restore_mcount_regs
 
 	jmp fgraph_trace
@@ -338,6 +338,6 @@ SYM_CODE_START(return_to_handler)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $24, %rsp
-	JMP_NOSPEC %rdi
+	JMP_NOSPEC rdi
 SYM_CODE_END(return_to_handler)
 #endif
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4742e8f..d1d7689 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial)
 	negl %ebx
 	lea 45f(%ebx,%ebx,2), %ebx
 	testl %esi, %esi
-	JMP_NOSPEC %ebx
+	JMP_NOSPEC ebx
 
 	# Handle 2-byte-aligned regions
 20:	addw (%esi), %ax
@@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
 	andl $-32,%edx
 	lea 3f(%ebx,%ebx), %ebx
 	testl %esi, %esi 
-	JMP_NOSPEC %ebx
+	JMP_NOSPEC ebx
 1:	addl $64,%esi
 	addl $64,%edi 
 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 15da118..90380a1 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
-	CALL_NOSPEC %rdi
+	CALL_NOSPEC rdi
 	leave
 	ret
 SYM_FUNC_END(__efi_call)

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] objtool: Move the IRET hack into the arch decoder
  2020-04-28 19:11 ` [PATCH v2 09/14] objtool: Move the IRET hack into the arch decoder Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Miroslav Benes
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Miroslav Benes @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Julien Thierry, Miroslav Benes, Peter Zijlstra (Intel),
	Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     b490f45362002fef57996388e395efc974b013f4
Gitweb:        https://git.kernel.org/tip/b490f45362002fef57996388e395efc974b013f4
Author:        Miroslav Benes <mbenes@suse.cz>
AuthorDate:    Fri, 24 Apr 2020 16:30:42 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:33 +02:00

objtool: Move the IRET hack into the arch decoder

Quoting Julien:

  "And the other suggestion is my other email was that you don't even
  need to add INSN_EXCEPTION_RETURN. You can keep IRET as
  INSN_CONTEXT_SWITCH by default and x86 decoder lookups the symbol
  conaining an iret. If it's a function symbol, it can just set the type
  to INSN_OTHER so that it caries on to the next instruction after
  having handled the stack_op."

Suggested-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191659.913283807@infradead.org
---
 tools/objtool/arch.h            |  1 -
 tools/objtool/arch/x86/decode.c | 28 ++++++++++++++++++----------
 tools/objtool/check.c           | 11 -----------
 tools/objtool/elf.c             |  4 ++--
 tools/objtool/elf.h             |  2 +-
 5 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 25dd4a9..cd118eb 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -19,7 +19,6 @@ enum insn_type {
 	INSN_CALL,
 	INSN_CALL_DYNAMIC,
 	INSN_RETURN,
-	INSN_EXCEPTION_RETURN,
 	INSN_CONTEXT_SWITCH,
 	INSN_BUG,
 	INSN_NOP,
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index e26bedb..d7b5d10 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -94,6 +94,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
 		      modrm_reg = 0, sib = 0;
 	struct stack_op *op = NULL;
+	struct symbol *sym;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
@@ -469,17 +470,24 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		break;
 
 	case 0xcf: /* iret */
-		*type = INSN_EXCEPTION_RETURN;
-
-		ADD_OP(op) {
-			/* add $40, %rsp */
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_SP;
-			op->src.offset = 5*8;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+		/*
+		 * Handle sync_core(), which has an IRET to self.
+		 * All other IRET are in STT_NONE entry code.
+		 */
+		sym = find_symbol_containing(sec, offset);
+		if (sym && sym->type == STT_FUNC) {
+			ADD_OP(op) {
+				/* add $40, %rsp */
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_SP;
+				op->src.offset = 5*8;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
+			break;
 		}
-		break;
+
+		/* fallthrough */
 
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4f3db2f..d822858 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2320,17 +2320,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
 			break;
 
-		case INSN_EXCEPTION_RETURN:
-			/*
-			 * This handles x86's sync_core() case, where we use an
-			 * IRET to self. All 'normal' IRET instructions are in
-			 * STT_NOTYPE entry symbols.
-			 */
-			if (func)
-				break;
-
-			return 0;
-
 		case INSN_CONTEXT_SWITCH:
 			if (func && (!next_insn || !next_insn->hint)) {
 				WARN_FUNC("unsupported instruction in callable function",
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 453b723..b6349ca 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -61,7 +61,7 @@ static void rb_add(struct rb_root *tree, struct rb_node *node,
 	rb_insert_color(node, tree);
 }
 
-static struct rb_node *rb_find_first(struct rb_root *tree, const void *key,
+static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key,
 			       int (*cmp)(const void *key, const struct rb_node *))
 {
 	struct rb_node *node = tree->rb_node;
@@ -189,7 +189,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 	return NULL;
 }
 
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
 {
 	struct rb_node *node;
 
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 5e76ac3..9d6bb07 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -124,7 +124,7 @@ struct section *find_section_by_name(const struct elf *elf, const char *name);
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len);

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] objtool: Remove INSN_STACK
  2020-04-28 19:11 ` [PATCH v2 08/14] objtool: Remove INSN_STACK Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Julien Thierry, Peter Zijlstra (Intel),
	Miroslav Benes, Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     b09fb65e863733e192d4825a285b4b4998969ce0
Gitweb:        https://git.kernel.org/tip/b09fb65e863733e192d4825a285b4b4998969ce0
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 24 Apr 2020 16:18:58 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:33 +02:00

objtool: Remove INSN_STACK

With the unconditional use of handle_insn_ops(), INSN_STACK has lost
its purpose. Remove it.

Suggested-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191659.854203028@infradead.org
---
 tools/objtool/arch.h            |  1 -
 tools/objtool/arch/x86/decode.c | 23 -----------------------
 tools/objtool/check.c           |  3 ---
 3 files changed, 27 deletions(-)

diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 445b8fa..25dd4a9 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -21,7 +21,6 @@ enum insn_type {
 	INSN_RETURN,
 	INSN_EXCEPTION_RETURN,
 	INSN_CONTEXT_SWITCH,
-	INSN_STACK,
 	INSN_BUG,
 	INSN_NOP,
 	INSN_STAC,
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 97e66c7..e26bedb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -141,7 +141,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
 
 			/* add/sub reg, %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -154,7 +153,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x50 ... 0x57:
 
 		/* push reg */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_REG;
 			op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
@@ -166,7 +164,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x58 ... 0x5f:
 
 		/* pop reg */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POP;
 			op->dest.type = OP_DEST_REG;
@@ -178,7 +175,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x68:
 	case 0x6a:
 		/* push immediate */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_CONST;
 			op->dest.type = OP_DEST_PUSH;
@@ -196,7 +192,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 		if (modrm == 0xe4) {
 			/* and imm, %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_AND;
 				op->src.reg = CFI_SP;
@@ -215,7 +210,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			break;
 
 		/* add/sub imm, %rsp */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_ADD;
 			op->src.reg = CFI_SP;
@@ -229,7 +223,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
 			/* mov %rsp, reg */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = CFI_SP;
@@ -242,7 +235,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
 
 			/* mov reg, %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -258,7 +250,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
 
 			/* mov reg, disp(%rbp) */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -270,7 +261,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
 
 			/* mov reg, disp(%rsp) */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
 				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
@@ -286,7 +276,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
 
 			/* mov disp(%rbp), reg */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_BP;
@@ -299,7 +288,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			   modrm_mod != 3 && modrm_rm == 4) {
 
 			/* mov disp(%rsp), reg */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_SP;
@@ -314,7 +302,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x8d:
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				if (!insn.displacement.value) {
 					/* lea (%rsp), reg */
@@ -332,7 +319,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		} else if (rex == 0x48 && modrm == 0x65) {
 
 			/* lea disp(%rbp), %rsp */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = CFI_BP;
@@ -350,7 +336,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			 * Restoring rsp back to its original value after a
 			 * stack realignment.
 			 */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = CFI_R10;
@@ -368,7 +353,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			 * Restoring rsp back to its original value after a
 			 * stack realignment.
 			 */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
 				op->src.reg = CFI_R13;
@@ -382,7 +366,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 	case 0x8f:
 		/* pop to mem */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POP;
 			op->dest.type = OP_DEST_MEM;
@@ -395,7 +378,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 	case 0x9c:
 		/* pushf */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_CONST;
 			op->dest.type = OP_DEST_PUSHF;
@@ -404,7 +386,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 	case 0x9d:
 		/* popf */
-		*type = INSN_STACK;
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POPF;
 			op->dest.type = OP_DEST_MEM;
@@ -443,7 +424,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		} else if (op2 == 0xa0 || op2 == 0xa8) {
 
 			/* push fs/gs */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_CONST;
 				op->dest.type = OP_DEST_PUSH;
@@ -452,7 +432,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		} else if (op2 == 0xa1 || op2 == 0xa9) {
 
 			/* pop fs/gs */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_POP;
 				op->dest.type = OP_DEST_MEM;
@@ -469,7 +448,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		 * mov bp, sp
 		 * pop bp
 		 */
-		*type = INSN_STACK;
 		ADD_OP(op)
 			op->dest.type = OP_DEST_LEAVE;
 
@@ -537,7 +515,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		else if (modrm_reg == 6) {
 
 			/* push from mem */
-			*type = INSN_STACK;
 			ADD_OP(op) {
 				op->src.type = OP_SRC_CONST;
 				op->dest.type = OP_DEST_PUSH;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6591c2d..4f3db2f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2339,9 +2339,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			}
 			return 0;
 
-		case INSN_STACK:
-			break;
-
 		case INSN_STAC:
 			if (state.uaccess) {
 				WARN_FUNC("recursive UACCESS enable", sec, insn->offset);

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] objtool: Rework allocating stack_ops on decode
  2020-04-28 19:11 ` [PATCH v2 06/14] objtool: Rework allocating stack_ops on decode Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Alexandre Chartre, Miroslav Benes, Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     7d989fcadd6e225a61d6490dd15bdbdfc8a53d5c
Gitweb:        https://git.kernel.org/tip/7d989fcadd6e225a61d6490dd15bdbdfc8a53d5c
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 23 Apr 2020 13:22:10 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:32 +02:00

objtool: Rework allocating stack_ops on decode

Wrap each stack_op in a macro that allocates and adds it to the list.
This simplifies trying to figure out what to do with the pre-allocated
stack_op at the end.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191659.736151601@infradead.org
---
 tools/objtool/arch/x86/decode.c | 251 ++++++++++++++++++-------------
 1 file changed, 147 insertions(+), 104 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index c45a0b4..97e66c7 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -77,6 +77,11 @@ unsigned long arch_jump_destination(struct instruction *insn)
 	return insn->offset + insn->len + insn->immediate;
 }
 
+#define ADD_OP(op) \
+	if (!(op = calloc(1, sizeof(*op)))) \
+		return -1; \
+	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, enum insn_type *type,
@@ -88,7 +93,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
 		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
 		      modrm_reg = 0, sib = 0;
-	struct stack_op *op;
+	struct stack_op *op = NULL;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
@@ -129,10 +134,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	if (insn.sib.nbytes)
 		sib = insn.sib.bytes[0];
 
-	op = calloc(1, sizeof(*op));
-	if (!op)
-		return -1;
-
 	switch (op1) {
 
 	case 0x1:
@@ -141,10 +142,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* add/sub reg, %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 		}
 		break;
 
@@ -152,9 +155,11 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 		/* push reg */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_REG;
-		op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
-		op->dest.type = OP_DEST_PUSH;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+			op->dest.type = OP_DEST_PUSH;
+		}
 
 		break;
 
@@ -162,9 +167,11 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 		/* pop reg */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		}
 
 		break;
 
@@ -172,8 +179,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x6a:
 		/* push immediate */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSH;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
 		break;
 
 	case 0x70 ... 0x7f:
@@ -188,11 +197,13 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		if (modrm == 0xe4) {
 			/* and imm, %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_AND;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.immediate.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_AND;
+				op->src.reg = CFI_SP;
+				op->src.offset = insn.immediate.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 			break;
 		}
 
@@ -205,11 +216,13 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 		/* add/sub imm, %rsp */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_ADD;
-		op->src.reg = CFI_SP;
-		op->src.offset = insn.immediate.value * sign;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = CFI_SP;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.immediate.value * sign;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
 		break;
 
 	case 0x89:
@@ -217,10 +230,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* mov %rsp, reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = CFI_SP;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = CFI_SP;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			}
 			break;
 		}
 
@@ -228,10 +243,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* mov reg, %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 			break;
 		}
 
@@ -242,21 +259,25 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* mov reg, disp(%rbp) */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG_INDIRECT;
-			op->dest.reg = CFI_BP;
-			op->dest.offset = insn.displacement.value;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG_INDIRECT;
+				op->dest.reg = CFI_BP;
+				op->dest.offset = insn.displacement.value;
+			}
 
 		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
 
 			/* mov reg, disp(%rsp) */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG_INDIRECT;
-			op->dest.reg = CFI_SP;
-			op->dest.offset = insn.displacement.value;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG_INDIRECT;
+				op->dest.reg = CFI_SP;
+				op->dest.offset = insn.displacement.value;
+			}
 		}
 
 		break;
@@ -266,22 +287,26 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* mov disp(%rbp), reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG_INDIRECT;
-			op->src.reg = CFI_BP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG_INDIRECT;
+				op->src.reg = CFI_BP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			}
 
 		} else if (rex_w && !rex_b && sib == 0x24 &&
 			   modrm_mod != 3 && modrm_rm == 4) {
 
 			/* mov disp(%rsp), reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG_INDIRECT;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG_INDIRECT;
+				op->src.reg = CFI_SP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			}
 		}
 
 		break;
@@ -290,27 +315,31 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
 			*type = INSN_STACK;
-			if (!insn.displacement.value) {
-				/* lea (%rsp), reg */
-				op->src.type = OP_SRC_REG;
-			} else {
-				/* lea disp(%rsp), reg */
-				op->src.type = OP_SRC_ADD;
-				op->src.offset = insn.displacement.value;
+			ADD_OP(op) {
+				if (!insn.displacement.value) {
+					/* lea (%rsp), reg */
+					op->src.type = OP_SRC_REG;
+				} else {
+					/* lea disp(%rsp), reg */
+					op->src.type = OP_SRC_ADD;
+					op->src.offset = insn.displacement.value;
+				}
+				op->src.reg = CFI_SP;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 			}
-			op->src.reg = CFI_SP;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 
 		} else if (rex == 0x48 && modrm == 0x65) {
 
 			/* lea disp(%rbp), %rsp */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_BP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_BP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 
 		} else if (rex == 0x49 && modrm == 0x62 &&
 			   insn.displacement.value == -8) {
@@ -322,11 +351,13 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			 * stack realignment.
 			 */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_R10;
-			op->src.offset = -8;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_R10;
+				op->src.offset = -8;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 
 		} else if (rex == 0x49 && modrm == 0x65 &&
 			   insn.displacement.value == -16) {
@@ -338,11 +369,13 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			 * stack realignment.
 			 */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_R13;
-			op->src.offset = -16;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_R13;
+				op->src.offset = -16;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 		}
 
 		break;
@@ -350,8 +383,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x8f:
 		/* pop to mem */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
-		op->dest.type = OP_DEST_MEM;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_MEM;
+		}
 		break;
 
 	case 0x90:
@@ -361,15 +396,19 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0x9c:
 		/* pushf */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSHF;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSHF;
+		}
 		break;
 
 	case 0x9d:
 		/* popf */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POPF;
-		op->dest.type = OP_DEST_MEM;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POPF;
+			op->dest.type = OP_DEST_MEM;
+		}
 		break;
 
 	case 0x0f:
@@ -405,15 +444,19 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* push fs/gs */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_CONST;
-			op->dest.type = OP_DEST_PUSH;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_CONST;
+				op->dest.type = OP_DEST_PUSH;
+			}
 
 		} else if (op2 == 0xa1 || op2 == 0xa9) {
 
 			/* pop fs/gs */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_POP;
-			op->dest.type = OP_DEST_MEM;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_POP;
+				op->dest.type = OP_DEST_MEM;
+			}
 		}
 
 		break;
@@ -427,7 +470,8 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		 * pop bp
 		 */
 		*type = INSN_STACK;
-		op->dest.type = OP_DEST_LEAVE;
+		ADD_OP(op)
+			op->dest.type = OP_DEST_LEAVE;
 
 		break;
 
@@ -449,12 +493,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	case 0xcf: /* iret */
 		*type = INSN_EXCEPTION_RETURN;
 
-		/* add $40, %rsp */
-		op->src.type = OP_SRC_ADD;
-		op->src.reg = CFI_SP;
-		op->src.offset = 5*8;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = CFI_SP;
+		ADD_OP(op) {
+			/* add $40, %rsp */
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = 5*8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
 		break;
 
 	case 0xca: /* retf */
@@ -492,8 +538,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 			/* push from mem */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_CONST;
-			op->dest.type = OP_DEST_PUSH;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_CONST;
+				op->dest.type = OP_DEST_PUSH;
+			}
 		}
 
 		break;
@@ -504,11 +552,6 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 	*immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
 
-	if (*type == INSN_STACK || *type == INSN_EXCEPTION_RETURN)
-		list_add_tail(&op->list, ops_list);
-	else
-		free(op);
-
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] objtool: Make handle_insn_ops() unconditional
  2020-04-28 19:11 ` [PATCH v2 07/14] objtool: Make handle_insn_ops() unconditional Peter Zijlstra
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  2020-05-07 12:38     ` Peter Zijlstra
  0 siblings, 1 reply; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Julien Thierry, Peter Zijlstra (Intel),
	Miroslav Benes, Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     60041bcd8f5ab560dabf44dc384f58bbeb5a6a30
Gitweb:        https://git.kernel.org/tip/60041bcd8f5ab560dabf44dc384f58bbeb5a6a30
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 24 Apr 2020 16:16:41 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:32 +02:00

objtool: Make handle_insn_ops() unconditional

Now that every instruction has a list of stack_ops; we can trivially
distinquish those instructions that do not have stack_ops, their list
is empty.

This means we can now call handle_insn_ops() unconditionally.

Suggested-by: Julien Thierry <jthierry@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191659.795115188@infradead.org
---
 tools/objtool/check.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 068897d..6591c2d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2259,6 +2259,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 				return 0;
 		}
 
+		if (handle_insn_ops(insn, &state))
+			return 1;
+
 		switch (insn->type) {
 
 		case INSN_RETURN:
@@ -2318,9 +2321,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			break;
 
 		case INSN_EXCEPTION_RETURN:
-			if (handle_insn_ops(insn, &state))
-				return 1;
-
 			/*
 			 * This handles x86's sync_core() case, where we use an
 			 * IRET to self. All 'normal' IRET instructions are in
@@ -2340,8 +2340,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			return 0;
 
 		case INSN_STACK:
-			if (handle_insn_ops(insn, &state))
-				return 1;
 			break;
 
 		case INSN_STAC:

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] x86,smap: Fix smap_{save,restore}() alternatives
  2020-04-29 10:18       ` Peter Zijlstra
  2020-04-29 12:12         ` Brian Gerst
@ 2020-05-01 18:22         ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel), Miroslav Benes, Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     1ff865e343c2b59469d7e41d370a980a3f972c71
Gitweb:        https://git.kernel.org/tip/1ff865e343c2b59469d7e41d370a980a3f972c71
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Tue, 28 Apr 2020 19:57:59 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:31 +02:00

x86,smap: Fix smap_{save,restore}() alternatives

As reported by objtool:

  lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack
  lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack

the smap_{save,restore}() alternatives violate (the newly enforced)
rule on stack invariance. That is, due to there only being a single
ORC table it must be valid to any alternative. These alternatives
violate this with the direct result that unwinds will not be correct
when it hits between the PUSH and POP instructions.

Rewrite the functions to only have a conditional jump.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200429101802.GI13592@hirez.programming.kicks-ass.net
---
 arch/x86/include/asm/smap.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 27c47d1..8b58d69 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void)
 {
 	unsigned long flags;
 
-	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
-				  X86_FEATURE_SMAP)
+	asm volatile ("# smap_save\n\t"
+		      ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+		      "pushf; pop %0; " __ASM_CLAC "\n\t"
+		      "1:"
 		      : "=rm" (flags) : : "memory", "cc");
 
 	return flags;
@@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void)
 
 static __always_inline void smap_restore(unsigned long flags)
 {
-	asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+	asm volatile ("# smap_restore\n\t"
+		      ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+		      "push %0; popf\n\t"
+		      "1:"
 		      : : "g" (flags) : "memory", "cc");
 }
 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [tip: objtool/core] objtool: Fix ORC vs alternatives
  2020-04-28 19:11 ` [PATCH v2 02/14] objtool: Fix ORC vs alternatives Peter Zijlstra
  2020-04-28 19:55   ` Josh Poimboeuf
  2020-04-29 14:33   ` Miroslav Benes
@ 2020-05-01 18:22   ` tip-bot2 for Peter Zijlstra
  2 siblings, 0 replies; 39+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-05-01 18:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Jann Horn, Peter Zijlstra (Intel),
	Miroslav Benes, Josh Poimboeuf, x86, LKML

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     7117f16bf460ef8cd132e6e80c989677397b4868
Gitweb:        https://git.kernel.org/tip/7117f16bf460ef8cd132e6e80c989677397b4868
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Tue, 28 Apr 2020 19:37:01 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 30 Apr 2020 20:14:31 +02:00

objtool: Fix ORC vs alternatives

Jann reported that (for instance) entry_64.o:general_protection has
very odd ORC data:

  0000000000000f40 <general_protection>:
  #######sp:sp+8 bp:(und) type:iret end:0
    f40:       90                      nop
  #######sp:(und) bp:(und) type:call end:0
    f41:       90                      nop
    f42:       90                      nop
  #######sp:sp+8 bp:(und) type:iret end:0
    f43:       e8 a8 01 00 00          callq  10f0 <error_entry>
  #######sp:sp+0 bp:(und) type:regs end:0
    f48:       f6 84 24 88 00 00 00    testb  $0x3,0x88(%rsp)
    f4f:       03
    f50:       74 00                   je     f52 <general_protection+0x12>
    f52:       48 89 e7                mov    %rsp,%rdi
    f55:       48 8b 74 24 78          mov    0x78(%rsp),%rsi
    f5a:       48 c7 44 24 78 ff ff    movq   $0xffffffffffffffff,0x78(%rsp)
    f61:       ff ff
    f63:       e8 00 00 00 00          callq  f68 <general_protection+0x28>
    f68:       e9 73 02 00 00          jmpq   11e0 <error_exit>
  #######sp:(und) bp:(und) type:call end:0
    f6d:       0f 1f 00                nopl   (%rax)

Note the entry at 0xf41. Josh found this was the result of commit:

  764eef4b109a ("objtool: Rewrite alt->skip_orig")

Due to the early return in validate_branch() we no longer set
insn->cfi of the original instruction stream (the NOPs at 0xf41 and
0xf42) and we'll end up with the above weirdness.

In other discussions we realized alternatives should be ORC invariant;
that is, due to there being only a single ORC table, it must be valid
for all alternatives. The easiest way to ensure this is to not allow
any stack modifications in alternatives.

When we enforce this latter observation, we get the property that the
whole alternative must have the same CFI, which we can employ to fix
the former report.

Fixes: 764eef4b109a ("objtool: Rewrite alt->skip_orig")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20200428191659.499074346@infradead.org
---
 tools/objtool/Documentation/stack-validation.txt |  7 +++-
 tools/objtool/check.c                            | 34 ++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index faa47c3..0189039 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -315,6 +315,13 @@ they mean, and suggestions for how to fix them.
       function tracing inserts additional calls, which is not obvious from the
       sources).
 
+10. file.o: warning: func()+0x5c: alternative modifies stack
+
+    This means that an alternative includes instructions that modify the
+    stack. The problem is that there is only one ORC unwind table, this means
+    that the ORC unwind entries must be valid for each of the alternatives.
+    The easiest way to enforce this is to ensure alternatives do not contain
+    any ORC entries, which in turn implies the above constraint.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4da6bfb..fa9bf36 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1983,6 +1983,11 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
 	list_for_each_entry(op, &insn->stack_ops, list) {
 		int res;
 
+		if (insn->alt_group) {
+			WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
+			return -1;
+		}
+
 		res = update_cfi_state(insn, &state->cfi, op);
 		if (res)
 			return res;
@@ -2150,6 +2155,30 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
 }
 
 /*
+ * Alternatives should not contain any ORC entries, this in turn means they
+ * should not contain any CFI ops, which implies all instructions should have
+ * the same same CFI state.
+ *
+ * It is possible to constuct alternatives that have unreachable holes that go
+ * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+ * states which then results in ORC entries, which we just said we didn't want.
+ *
+ * Avoid them by copying the CFI entry of the first instruction into the whole
+ * alternative.
+ */
+static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+{
+	struct instruction *first_insn = insn;
+	int alt_group = insn->alt_group;
+
+	sec_for_each_insn_continue(file, insn) {
+		if (insn->alt_group != alt_group)
+			break;
+		insn->cfi = first_insn->cfi;
+	}
+}
+
+/*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
  * each instruction and validate all the rules described in
@@ -2200,7 +2229,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
 		insn->visited |= visited;
 
-		if (!insn->ignore_alts) {
+		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
 			bool skip_orig = false;
 
 			list_for_each_entry(alt, &insn->alts, list) {
@@ -2215,6 +2244,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 				}
 			}
 
+			if (insn->alt_group)
+				fill_alternative_cfi(file, insn);
+
 			if (skip_orig)
 				return 0;
 		}

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [tip: objtool/core] objtool: Make handle_insn_ops() unconditional
  2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2020-05-07 12:38     ` Peter Zijlstra
  0 siblings, 0 replies; 39+ messages in thread
From: Peter Zijlstra @ 2020-05-07 12:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-tip-commits, Julien Thierry, Miroslav Benes, Josh Poimboeuf, x86

On Fri, May 01, 2020 at 06:22:21PM -0000, tip-bot2 for Peter Zijlstra wrote:
> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> index 068897d..6591c2d 100644
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -2259,6 +2259,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
>  				return 0;
>  		}
>  
> +		if (handle_insn_ops(insn, &state))
> +			return 1;
> +
>  		switch (insn->type) {
>  
>  		case INSN_RETURN:

Fun question; when an instruction has both a hint and ops, who should
win? I'm currently in that situation and I'd like the hint to win, but
is that 'right' ?

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2020-05-07 12:39 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-28 19:11 [PATCH v2 00/14] objtool vs retpoline Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 01/14] objtool: Allow branches within the same alternative Peter Zijlstra
2020-04-28 19:53   ` Josh Poimboeuf
2020-04-28 19:11 ` [PATCH v2 02/14] objtool: Fix ORC vs alternatives Peter Zijlstra
2020-04-28 19:55   ` Josh Poimboeuf
2020-04-29 14:33   ` Miroslav Benes
2020-04-29 15:51     ` Peter Zijlstra
2020-04-29 16:41       ` Miroslav Benes
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 03/14] x86,smap: Fix smap_{save,restore}() alternatives Peter Zijlstra
2020-04-29  0:54   ` Brian Gerst
2020-04-29  8:30     ` Peter Zijlstra
2020-04-29 10:18       ` Peter Zijlstra
2020-04-29 12:12         ` Brian Gerst
2020-05-01 18:22         ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 04/14] objtool: is_fentry_call() crashes if call has no destination Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 05/14] objtool: UNWIND_HINT_RET_OFFSET should not check registers Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 06/14] objtool: Rework allocating stack_ops on decode Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 07/14] objtool: Make handle_insn_ops() unconditional Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-05-07 12:38     ` Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 08/14] objtool: Remove INSN_STACK Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 09/14] objtool: Move the IRET hack into the arch decoder Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Miroslav Benes
2020-04-28 19:11 ` [PATCH v2 10/14] objtool: Add support for intra-function calls Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 11/14] x86/speculation: Change FILL_RETURN_BUFFER to work with objtool Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 12/14] x86: Simplify retpoline declaration Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 13/14] x86: Change {JMP,CALL}_NOSPEC argument Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 19:11 ` [PATCH v2 14/14] x86/retpoline: Fix retpoline unwind Peter Zijlstra
2020-05-01 18:22   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2020-04-28 20:17 ` [PATCH v2 00/14] objtool vs retpoline Josh Poimboeuf
2020-04-29 10:19 ` [PATCH v2.1 01-A/14] objtool: Remove check preventing branches within alternative Peter Zijlstra
2020-04-29 10:21 ` [PATCH v2.1 01-B/14] objtool: Uniquely identify alternative instruction groups Peter Zijlstra
2020-04-29 16:46 ` [PATCH v2 00/14] objtool vs retpoline Miroslav Benes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.