linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/9] x86,objtool: Optimize !RETPOLINE
@ 2021-03-12 17:16 Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 1/9] x86/retpoline: Simplify retpolines Peter Zijlstra
                   ` (8 more replies)
  0 siblings, 9 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

Hi,

Now that Juergen's paravirt rework, which included the negative alternative
stuff, landed in tip, here's a respin of my retpoline patches.

The main feature is replacing the compiler generated (tail) calls to
__x86_indirect_thunk_\reg with an ALTERNATIVE that replaces them with regular
indirect call instructions, eliding the double jump.

Patches rely on tip/objtool/core and tip/x86/alternatives


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/9] x86/retpoline: Simplify retpolines
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 2/9] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

Due to commit c9c324dc22aa ("objtool: Support stack layout changes
in alternatives"), it is possible to simplify the retpolines.

Currently our retpolines consist of 2 symbols,
__x86_indirect_thunk_\reg, which is the compiler target, and
__x86_retpoline_\reg, which is the actual retpoline. Both are
consecutive in code and aligned such that for any one register they
both live in the same cacheline:

  0000000000000000 <__x86_indirect_thunk_rax>:
   0:   ff e0                   jmpq   *%rax
   2:   90                      nop
   3:   90                      nop
   4:   90                      nop

  0000000000000005 <__x86_retpoline_rax>:
   5:   e8 07 00 00 00          callq  11 <__x86_retpoline_rax+0xc>
   a:   f3 90                   pause
   c:   0f ae e8                lfence
   f:   eb f9                   jmp    a <__x86_retpoline_rax+0x5>
  11:   48 89 04 24             mov    %rax,(%rsp)
  15:   c3                      retq
  16:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)

The thunk is an alternative_2, where one option is a jmp to the
retpoline. This was done so that objtool didn't need to deal with
alternatives with stack ops. But that problem has been solved, so now
it is possible to fold the entire retpoline into the alternative to
simplify and consolidate unused bytes:

  0000000000000000 <__x86_indirect_thunk_rax>:
   0:   ff e0                   jmpq   *%rax
   2:   90                      nop
   3:   90                      nop
   4:   90                      nop
   5:   90                      nop
   6:   90                      nop
   7:   90                      nop
   8:   90                      nop
   9:   90                      nop
   a:   90                      nop
   b:   90                      nop
   c:   90                      nop
   d:   90                      nop
   e:   90                      nop
   f:   90                      nop
  10:   90                      nop
  11:   66 66 2e 0f 1f 84 00 00 00 00 00        data16 nopw %cs:0x0(%rax,%rax,1)
  1c:   0f 1f 40 00             nopl   0x0(%rax)

Notice that since the longest alternative sequence is now:

   0:   e8 07 00 00 00          callq  c <.altinstr_replacement+0xc>
   5:   f3 90                   pause
   7:   0f ae e8                lfence
   a:   eb f9                   jmp    5 <.altinstr_replacement+0x5>
   c:   48 89 04 24             mov    %rax,(%rsp)
  10:   c3                      retq

17 bytes, we have 15 bytes NOP at the end of our 32 byte slot.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/asm-prototypes.h |    7 -------
 arch/x86/include/asm/nospec-branch.h  |    6 +++---
 arch/x86/lib/retpoline.S              |   34 +++++++++++++++++-----------------
 tools/objtool/check.c                 |    3 +--
 4 files changed, 21 insertions(+), 29 deletions(-)

--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
 	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
-#define DECL_RETPOLINE(reg) \
-	extern asmlinkage void __x86_retpoline_ ## reg (void);
-
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
-#undef GEN
-#define GEN(reg) DECL_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>
-
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -81,7 +81,7 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
-		      __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
 		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	jmp	*%\reg
@@ -91,7 +91,7 @@
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
-		      __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
 		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
 	call	*%\reg
@@ -129,7 +129,7 @@
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	"call __x86_retpoline_%V[thunk_target]\n",		\
+	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,27 +10,31 @@
 #include <asm/unwind_hints.h>
 #include <asm/frame.h>
 
-.macro THUNK reg
-	.section .text.__x86.indirect_thunk
-
-	.align 32
-SYM_FUNC_START(__x86_indirect_thunk_\reg)
-	JMP_NOSPEC \reg
-SYM_FUNC_END(__x86_indirect_thunk_\reg)
-
-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+.macro RETPOLINE reg
 	ANNOTATE_INTRA_FUNCTION_CALL
-	call	.Ldo_rop_\@
+	call    .Ldo_rop_\@
 .Lspec_trap_\@:
 	UNWIND_HINT_EMPTY
 	pause
 	lfence
-	jmp	.Lspec_trap_\@
+	jmp .Lspec_trap_\@
 .Ldo_rop_\@:
-	mov	%\reg, (%_ASM_SP)
+	mov     %\reg, (%_ASM_SP)
 	UNWIND_HINT_FUNC
 	ret
-SYM_FUNC_END(__x86_retpoline_\reg)
+.endm
+
+.macro THUNK reg
+	.section .text.__x86.indirect_thunk
+
+	.align 32
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
+
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+		      __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
 
 .endm
 
@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg)
 
 #define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
 #define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
 
 #undef GEN
 #define GEN(reg) THUNK reg
@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
-#undef GEN
-#define GEN(reg) EXPORT_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -800,8 +800,7 @@ static int add_jump_destinations(struct
 		} else if (reloc->sym->type == STT_SECTION) {
 			dest_sec = reloc->sym->sec;
 			dest_off = arch_dest_reloc_offset(reloc->addend);
-		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
-			   !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
 			/*
 			 * Retpoline jumps are really dynamic jumps in
 			 * disguise, so convert them accordingly.



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 2/9] objtool: Correctly handle retpoline thunk calls
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 1/9] x86/retpoline: Simplify retpolines Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-16 21:19   ` Josh Poimboeuf
  2021-03-12 17:16 ` [PATCH 3/9] objtool: Per arch retpoline naming Peter Zijlstra
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

Just like JMP handling, convert a direct CALL to a retpoline thunk
into a retpoline safe indirect CALL.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -953,6 +953,18 @@ static int add_call_destinations(struct
 					  dest_off);
 				return -1;
 			}
+
+		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
+			/*
+			 * Retpoline calls are really dynamic calls in
+			 * disguise, so convert them accodingly.
+			 */
+			insn->type = INSN_CALL_DYNAMIC;
+			insn->retpoline_safe = true;
+
+			remove_insn_ops(insn);
+			continue;
+
 		} else
 			insn->call_dest = reloc->sym;
 



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 3/9] objtool: Per arch retpoline naming
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 1/9] x86/retpoline: Simplify retpolines Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 2/9] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 4/9] objtool: Fix static_call list generation Peter Zijlstra
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

The __x86_indirect_ naming is obviously not generic. Shorten to allow
matching some additional magic names later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/arch/x86/decode.c      |    5 +++++
 tools/objtool/check.c                |    9 +++++++--
 tools/objtool/include/objtool/arch.h |    2 ++
 3 files changed, 14 insertions(+), 2 deletions(-)

--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -690,3 +690,8 @@ int arch_decode_hint_reg(struct instruct
 
 	return 0;
 }
+
+bool arch_is_retpoline(struct symbol *sym)
+{
+	return !strncmp(sym->name, "__x86_indirect_", 15);
+}
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -850,6 +850,11 @@ static int add_ignore_alternatives(struc
 	return 0;
 }
 
+__weak bool arch_is_retpoline(struct symbol *sym)
+{
+	return false;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -872,7 +877,7 @@ static int add_jump_destinations(struct
 		} else if (reloc->sym->type == STT_SECTION) {
 			dest_sec = reloc->sym->sec;
 			dest_off = arch_dest_reloc_offset(reloc->addend);
-		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
+		} else if (arch_is_retpoline(reloc->sym)) {
 			/*
 			 * Retpoline jumps are really dynamic jumps in
 			 * disguise, so convert them accordingly.
@@ -1026,7 +1031,7 @@ static int add_call_destinations(struct
 				return -1;
 			}
 
-		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
+		} else if (arch_is_retpoline(reloc->sym)) {
 			/*
 			 * Retpoline calls are really dynamic calls in
 			 * disguise, so convert them accodingly.
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -85,4 +85,6 @@ const char *arch_nop_insn(int len);
 
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
 
+bool arch_is_retpoline(struct symbol *sym);
+
 #endif /* _ARCH_H */



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 4/9] objtool: Fix static_call list generation
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
                   ` (2 preceding siblings ...)
  2021-03-12 17:16 ` [PATCH 3/9] objtool: Per arch retpoline naming Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-17  3:18   ` Josh Poimboeuf
  2021-03-12 17:16 ` [PATCH 5/9] objtool: Rework rebuild_reloc logic Peter Zijlstra
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

Currently objtool generates tail call entries in
add_jump_destination() but waits until validate_branch() to generate
the regular call entries, move these to add_call_destination() for
consistency.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |   17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -974,6 +974,11 @@ static int add_call_destinations(struct
 		} else
 			insn->call_dest = reloc->sym;
 
+		if (insn->call_dest && insn->call_dest->static_call_tramp) {
+			list_add_tail(&insn->static_call_node,
+				      &file->static_call_list);
+		}
+
 		/*
 		 * Many compilers cannot disable KCOV with a function attribute
 		 * so they need a little help, NOP out any KCOV calls from noinstr
@@ -1701,6 +1706,9 @@ static int decode_sections(struct objtoo
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be before add_{jump_call}_desetination.
+	 */
 	ret = read_static_call_tramps(file);
 	if (ret)
 		return ret;
@@ -1717,6 +1725,10 @@ static int decode_sections(struct objtoo
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be before add_call_destination(); it changes INSN_CALL to
+	 * INSN_JUMP.
+	 */
 	ret = read_intra_function_calls(file);
 	if (ret)
 		return ret;
@@ -2659,11 +2671,6 @@ static int validate_branch(struct objtoo
 			if (dead_end_function(file, insn->call_dest))
 				return 0;
 
-			if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
-				list_add_tail(&insn->static_call_node,
-					      &file->static_call_list);
-			}
-
 			break;
 
 		case INSN_JUMP_CONDITIONAL:



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
                   ` (3 preceding siblings ...)
  2021-03-12 17:16 ` [PATCH 4/9] objtool: Fix static_call list generation Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-17  3:34   ` Josh Poimboeuf
  2021-03-12 17:16 ` [PATCH 6/9] objtool: Add elf_create_undef_symbol() Peter Zijlstra
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

Instead of manually calling elf_rebuild_reloc_section() on sections
we've called elf_add_reloc() on, have elf_write() DTRT.

This makes it easier to add random relocations in places without
carefully tracking when we're done and need to flush what section.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c               |    3 ---
 tools/objtool/elf.c                 |   13 ++++++++++++-
 tools/objtool/include/objtool/elf.h |    2 +-
 tools/objtool/orc_gen.c             |    3 ---
 4 files changed, 13 insertions(+), 8 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -542,9 +542,6 @@ static int create_static_call_sections(s
 		idx++;
 	}
 
-	if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-		return -1;
-
 	return 0;
 }
 
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, stru
 
 	list_add_tail(&reloc->list, &sec->reloc_list);
 	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
+	sec->rereloc = true;
 }
 
 static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
@@ -558,7 +560,9 @@ static int read_relocs(struct elf *elf)
 				return -1;
 			}
 
-			elf_add_reloc(elf, reloc);
+			list_add_tail(&reloc->list, &sec->reloc_list);
+			elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
 			nr_reloc++;
 		}
 		max_reloc = max(max_reloc, nr_reloc);
@@ -890,6 +894,8 @@ int elf_rebuild_reloc_section(struct elf
 	case SHT_RELA: return elf_rebuild_rela_reloc_section(sec, nr);
 	default:       return -1;
 	}
+
+	sec->rereloc = false;
 }
 
 int elf_write_insn(struct elf *elf, struct section *sec,
@@ -944,6 +950,11 @@ int elf_write(struct elf *elf)
 	struct section *sec;
 	Elf_Scn *s;
 
+	list_for_each_entry(sec, &elf->sections, list) {
+		if (sec->reloc && sec->reloc->rereloc)
+			elf_rebuild_reloc_section(elf, sec->reloc);
+	}
+
 	/* Update section headers for changed sections: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -39,7 +39,7 @@ struct section {
 	char *name;
 	int idx;
 	unsigned int len;
-	bool changed, text, rodata, noinstr;
+	bool changed, text, rodata, noinstr, rereloc;
 };
 
 struct symbol {
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -254,8 +254,5 @@ int orc_create(struct objtool_file *file
 			return -1;
 	}
 
-	if (elf_rebuild_reloc_section(file->elf, ip_rsec))
-		return -1;
-
 	return 0;
 }



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
                   ` (4 preceding siblings ...)
  2021-03-12 17:16 ` [PATCH 5/9] objtool: Rework rebuild_reloc logic Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-17 13:52   ` Miroslav Benes
  2021-03-12 17:16 ` [PATCH 7/9] objtool: Allow archs to rewrite retpolines Peter Zijlstra
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

Allow objtool to create undefined symbols; this allows creating
relocations to symbols not currently in the symbol table.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/elf.c                 |  180 +++++++++++++++++++++++++++---------
 tools/objtool/include/objtool/elf.h |    1 
 2 files changed, 139 insertions(+), 42 deletions(-)

--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -316,12 +316,60 @@ static int read_sections(struct elf *elf
 	return 0;
 }
 
+static bool elf_symbol_add(struct elf *elf, struct symbol *sym, Elf32_Word shndx)
+{
+	struct list_head *entry;
+	struct rb_node *pnode;
+
+	sym->type = GELF_ST_TYPE(sym->sym.st_info);
+	sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+	if ((sym->sym.st_shndx > SHN_UNDEF &&
+	     sym->sym.st_shndx < SHN_LORESERVE) ||
+	    (shndx != SHN_XINDEX && sym->sym.st_shndx == SHN_XINDEX)) {
+		if (sym->sym.st_shndx != SHN_XINDEX)
+			shndx = sym->sym.st_shndx;
+
+		sym->sec = find_section_by_index(elf, shndx);
+		if (!sym->sec) {
+			WARN("couldn't find section for symbol %s",
+			     sym->name);
+			return false;
+		}
+		if (sym->type == STT_SECTION) {
+			sym->name = sym->sec->name;
+			sym->sec->sym = sym;
+		}
+	} else
+		sym->sec = find_section_by_index(elf, 0);
+
+	sym->offset = sym->sym.st_value;
+	sym->len = sym->sym.st_size;
+
+	rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
+	pnode = rb_prev(&sym->node);
+	if (pnode)
+		entry = &rb_entry(pnode, struct symbol, node)->list;
+	else
+		entry = &sym->sec->symbol_list;
+	list_add(&sym->list, entry);
+	elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+	elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+
+	/*
+	 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
+	 * can exist within a function, confusing the sorting.
+	 */
+	if (!sym->len)
+		rb_erase(&sym->node, &sym->sec->symbol_tree);
+
+	return true;
+}
+
 static int read_symbols(struct elf *elf)
 {
 	struct section *symtab, *symtab_shndx, *sec;
 	struct symbol *sym, *pfunc;
-	struct list_head *entry;
-	struct rb_node *pnode;
 	int symbols_nr, i;
 	char *coldstr;
 	Elf_Data *shndx_data = NULL;
@@ -366,47 +414,11 @@ static int read_symbols(struct elf *elf)
 			goto err;
 		}
 
-		sym->type = GELF_ST_TYPE(sym->sym.st_info);
-		sym->bind = GELF_ST_BIND(sym->sym.st_info);
+		if (!shndx_data)
+			shndx = SHN_XINDEX;
 
-		if ((sym->sym.st_shndx > SHN_UNDEF &&
-		     sym->sym.st_shndx < SHN_LORESERVE) ||
-		    (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
-			if (sym->sym.st_shndx != SHN_XINDEX)
-				shndx = sym->sym.st_shndx;
-
-			sym->sec = find_section_by_index(elf, shndx);
-			if (!sym->sec) {
-				WARN("couldn't find section for symbol %s",
-				     sym->name);
-				goto err;
-			}
-			if (sym->type == STT_SECTION) {
-				sym->name = sym->sec->name;
-				sym->sec->sym = sym;
-			}
-		} else
-			sym->sec = find_section_by_index(elf, 0);
-
-		sym->offset = sym->sym.st_value;
-		sym->len = sym->sym.st_size;
-
-		rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
-		pnode = rb_prev(&sym->node);
-		if (pnode)
-			entry = &rb_entry(pnode, struct symbol, node)->list;
-		else
-			entry = &sym->sec->symbol_list;
-		list_add(&sym->list, entry);
-		elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
-		elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
-
-		/*
-		 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
-		 * can exist within a function, confusing the sorting.
-		 */
-		if (!sym->len)
-			rb_erase(&sym->node, &sym->sec->symbol_tree);
+		if (!elf_symbol_add(elf, sym, shndx))
+			goto err;
 	}
 
 	if (stats)
@@ -640,6 +652,90 @@ struct elf *elf_open_read(const char *na
 	return NULL;
 }
 
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+{
+	struct section *strtab, *symtab;
+	struct symbol *sym;
+	Elf_Scn *s;
+	Elf_Data *data;
+
+	sym = malloc(sizeof(*sym));
+	if (!sym) {
+		perror("malloc");
+		return NULL;
+	}
+	memset(sym, 0, sizeof(*sym));
+
+	sym->name = strdup(name);
+
+	strtab = find_section_by_name(elf, ".strtab");
+	if (!strtab) {
+		WARN("can't find .strtab");
+		return NULL;
+	}
+
+	s = elf_getscn(elf->elf, strtab->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return NULL;
+	}
+
+	data = elf_newdata(s);
+	if (!data) {
+		WARN_ELF("elf_newdata");
+		return NULL;
+	}
+
+	data->d_buf = sym->name;
+	data->d_size = strlen(sym->name) + 1;
+	data->d_align = 1;
+
+	sym->sym.st_name = strtab->len;
+	sym->sym.st_info = 0x10; /* STB_GLOBAL, STT_NOTYPE */
+	// st_other 0
+	// st_shndx 0
+	// st_value 0
+	// st_size 0
+
+	strtab->len += data->d_size;
+	strtab->changed = true;
+
+
+	symtab = find_section_by_name(elf, ".symtab");
+	if (!symtab) {
+		WARN("can't find .symtab");
+		return NULL;
+	}
+
+	s = elf_getscn(elf->elf, symtab->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return NULL;
+	}
+
+	data = elf_newdata(s);
+	if (!data) {
+		WARN_ELF("elf_newdata");
+		return NULL;
+	}
+
+	data->d_buf = &sym->sym;
+	data->d_size = sizeof(sym->sym);
+	data->d_align = 1;
+
+	sym->idx = symtab->len / sizeof(sym->sym);
+
+	symtab->len += data->d_size;
+	symtab->changed = true;
+
+	if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
+		WARN("elf_symbol_add");
+		return NULL;
+	}
+
+	return sym;
+}
+
 struct section *elf_create_section(struct elf *elf, const char *name,
 				   unsigned int sh_flags, size_t entsize, int nr)
 {
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -128,6 +128,7 @@ int elf_write_insn(struct elf *elf, stru
 		   unsigned long offset, unsigned int len,
 		   const char *insn);
 int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
 int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 7/9] objtool: Allow archs to rewrite retpolines
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
                   ` (5 preceding siblings ...)
  2021-03-12 17:16 ` [PATCH 6/9] objtool: Add elf_create_undef_symbol() Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 8/9] objtool: Skip magical retpoline .altinstr_replacement Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 9/9] objtool,x86: Rewrite retpoline thunk calls Peter Zijlstra
  8 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

When retpolines are employed, compilers typically emit calls to
retpoline thunks. Objtool recognises these calls and marks them as
dynamic calls.

Provide infrastructure for architectures to rewrite/augment what the
compiler wrote for us.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                |   23 +++++++++++++++++++++--
 tools/objtool/include/objtool/arch.h |    3 +++
 2 files changed, 24 insertions(+), 2 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -852,6 +852,14 @@ __weak bool arch_is_retpoline(struct sym
 	return false;
 }
 
+
+__weak int arch_rewrite_retpoline(struct objtool_file *file,
+				  struct instruction *insn,
+				  struct reloc *reloc)
+{
+	return 0;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -885,6 +893,9 @@ static int add_jump_destinations(struct
 				insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
 
 			insn->retpoline_safe = true;
+
+			arch_rewrite_retpoline(file, insn, reloc);
+
 			continue;
 		} else if (insn->func) {
 			/* internal or external sibling call (with reloc) */
@@ -1036,6 +1047,8 @@ static int add_call_destinations(struct
 			insn->type = INSN_CALL_DYNAMIC;
 			insn->retpoline_safe = true;
 
+			arch_rewrite_retpoline(file, insn, reloc);
+
 			remove_insn_ops(insn);
 			continue;
 
@@ -1212,6 +1225,8 @@ static int handle_group_alt(struct objto
 		dest_off = arch_jump_destination(insn);
 		if (dest_off == special_alt->new_off + special_alt->new_len)
 			insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+		else
+			insn->jump_dest = find_insn(file, insn->sec, dest_off);
 
 		if (!insn->jump_dest) {
 			WARN_FUNC("can't find alternative jump destination",
@@ -1797,11 +1812,15 @@ static int decode_sections(struct objtoo
 	if (ret)
 		return ret;
 
-	ret = add_jump_destinations(file);
+	/*
+	 * Must be before add_{jump,call}_destination; for they can add
+	 * magic alternatives.
+	 */
+	ret = add_special_section_alts(file);
 	if (ret)
 		return ret;
 
-	ret = add_special_section_alts(file);
+	ret = add_jump_destinations(file);
 	if (ret)
 		return ret;
 
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -82,6 +82,9 @@ unsigned long arch_jump_destination(stru
 unsigned long arch_dest_reloc_offset(int addend);
 
 const char *arch_nop_insn(int len);
+int arch_rewrite_retpoline(struct objtool_file *file,
+			   struct instruction *insn,
+			   struct reloc *reloc);
 
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
 



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 8/9] objtool: Skip magical retpoline .altinstr_replacement
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
                   ` (6 preceding siblings ...)
  2021-03-12 17:16 ` [PATCH 7/9] objtool: Allow archs to rewrite retpolines Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  2021-03-12 17:16 ` [PATCH 9/9] objtool,x86: Rewrite retpoline thunk calls Peter Zijlstra
  8 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

When the .altinstr_replacement is a retpoline, skip the alternative.
We already special case retpolines anyway.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/special.c |   12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf
 			return -1;
 		}
 
+		/*
+		 * Skip retpoline .altinstr_replacement... we already rewrite the
+		 * instructions for retpolines anyway, see arch_is_retpoline()
+		 * usage in add_{call,jump}_destinations().
+		 */
+		if (arch_is_retpoline(new_reloc->sym))
+			return 1;
+
 		alt->new_sec = new_reloc->sym->sec;
 		alt->new_off = (unsigned int)new_reloc->addend;
 
@@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, st
 			memset(alt, 0, sizeof(*alt));
 
 			ret = get_alt_entry(elf, entry, sec, idx, alt);
-			if (ret)
+			if (ret > 0)
+				continue;
+			if (ret < 0)
 				return ret;
 
 			list_add_tail(&alt->list, alts);



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 9/9] objtool,x86: Rewrite retpoline thunk calls
  2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
                   ` (7 preceding siblings ...)
  2021-03-12 17:16 ` [PATCH 8/9] objtool: Skip magical retpoline .altinstr_replacement Peter Zijlstra
@ 2021-03-12 17:16 ` Peter Zijlstra
  8 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-12 17:16 UTC (permalink / raw)
  To: x86, jpoimboe, jgross, mbenes; +Cc: linux-kernel, peterz

When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:

	ALTERNATIVE "call __x86_indirect_thunk_\reg",
		    "call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)

Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_indirect_alt_*.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/asm-prototypes.h |   12 ++
 arch/x86/lib/retpoline.S              |   33 +++++++-
 tools/objtool/arch/x86/decode.c       |  139 ++++++++++++++++++++++++++++++++++
 3 files changed, 181 insertions(+), 3 deletions(-)

--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
 
 #ifdef CONFIG_RETPOLINE
 
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
 	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) \
+	extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
 
 #undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+	extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
 #include <asm/GEN-for-each-reg.h>
 
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
 #include <asm/unwind_hints.h>
 #include <asm/frame.h>
 
+	.section .text.__x86.indirect_thunk
+
 .macro RETPOLINE reg
 	ANNOTATE_INTRA_FUNCTION_CALL
 	call    .Ldo_rop_\@
@@ -25,7 +27,6 @@
 .endm
 
 .macro THUNK reg
-	.section .text.__x86.indirect_thunk
 
 	.align 32
 SYM_FUNC_START(__x86_indirect_thunk_\reg)
@@ -38,6 +39,24 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 
 .endm
 
+.macro CALL_THUNK reg
+
+	.align 1
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+	ANNOTATE_RETPOLINE_SAFE
+1:	call	*%\reg
+2:	.nops	5-(2b-1b)
+SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+	ANNOTATE_RETPOLINE_SAFE
+1:	jmp	*%\reg
+2:	.nops	5-(2b-1b)
+SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
+.endm
+
 /*
  * Despite being an assembler file we can't just use .irp here
  * because __KSYM_DEPS__ only uses the C preprocessor and would
@@ -61,3 +80,15 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
+#undef GEN
+#define GEN(reg) CALL_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,6 +16,7 @@
 #include <objtool/elf.h>
 #include <objtool/arch.h>
 #include <objtool/warn.h>
+#include <arch/elf.h>
 
 static int is_x86_64(const struct elf *elf)
 {
@@ -655,6 +656,144 @@ const char *arch_nop_insn(int len)
 	return nops[len-1];
 }
 
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV	(1 << 15)
+#define ALT_NOT(feat)		((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+	s32 instr_offset;	/* original instruction */
+	s32 repl_offset;	/* offset to replacement instruction */
+	u16 cpuid;		/* cpuid bit set for replacement */
+	u8  instrlen;		/* length of original instruction */
+	u8  replacementlen;	/* length of new instruction */
+	u8  padlen;		/* length of build-time padding */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+			struct instruction *orig, struct symbol *sym,
+			int cpuid, u8 orig_len, u8 repl_len, u8 pad_len)
+{
+	struct section *sec, *reloc_sec;
+	struct reloc *reloc;
+	Elf_Scn *s;
+	const int size = sizeof(struct alt_instr);
+	struct alt_instr *alt;
+
+	sec = find_section_by_name(elf, ".altinstructions");
+	if (!sec) {
+		sec = elf_create_section(elf, ".altinstructions",
+					 SHF_WRITE, size, 0);
+
+		if (!sec) {
+			WARN_ELF("elf_create_section");
+			return -1;
+		}
+
+		reloc_sec = elf_create_reloc_section(elf, sec, SHT_RELA);
+		if (!reloc_sec) {
+			WARN_ELF("elf_create_reloc_section");
+			return -1;
+		}
+	}
+
+	s = elf_getscn(elf->elf, sec->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return -1;
+	}
+
+	sec->data = elf_newdata(s);
+	if (!sec->data) {
+		WARN_ELF("elf_newdata");
+		return -1;
+	}
+
+	sec->data->d_size = size;
+	sec->data->d_align = 1;
+
+	alt = sec->data->d_buf = malloc(size);
+	if (!sec->data->d_buf) {
+		perror("malloc");
+		return -1;
+	}
+	memset(sec->data->d_buf, 0, size);
+
+	alt->cpuid = cpuid;
+	alt->instrlen = orig_len;
+	alt->replacementlen = repl_len;
+	alt->padlen = pad_len;
+
+	reloc = malloc(sizeof(*reloc));
+	if (!reloc) {
+		perror("malloc");
+		return -1;
+	}
+	memset(reloc, 0, sizeof(*reloc));
+
+	insn_to_reloc_sym_addend(orig->sec, orig->offset, reloc);
+	if (!reloc->sym) {
+		WARN_FUNC("alt: missing containing symbol",
+			  orig->sec, orig->offset);
+		return -1;
+	}
+
+	reloc->type = R_X86_64_PC32;
+	reloc->offset = sec->sh.sh_size;
+	reloc->sec = sec->reloc;
+	elf_add_reloc(elf, reloc);
+
+	reloc = malloc(sizeof(*reloc));
+	if (!reloc) {
+		perror("malloc");
+		return -1;
+	}
+	memset(reloc, 0, sizeof(*reloc));
+
+	reloc->sym = sym;
+	reloc->addend = 0;
+	reloc->type = R_X86_64_PC32;
+	reloc->offset = sec->sh.sh_size + 4;
+	reloc->sec = sec->reloc;
+	elf_add_reloc(elf, reloc);
+
+	sec->sh.sh_size += size;
+	sec->changed = true;
+
+	return 0;
+}
+
+#define X86_FEATURE_RETPOLINE                ( 7*32+12)
+
+int arch_rewrite_retpoline(struct objtool_file *file,
+			   struct instruction *insn,
+			   struct reloc *reloc)
+{
+	struct symbol *sym;
+	char name[32] = "";
+
+	if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+		return 0;
+
+	sprintf(name, "__x86_indirect_alt_%s_%s",
+		insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+		reloc->sym->name + 21);
+
+	sym = find_symbol_by_name(file->elf, name);
+	if (!sym) {
+		sym = elf_create_undef_symbol(file->elf, name);
+		if (!sym) {
+			WARN("elf_create_undef_symbol");
+			return -1;
+		}
+	}
+
+	elf_add_alternative(file->elf, insn, sym,
+			    ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5, 0);
+
+	return 0;
+}
+
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
 {
 	struct cfi_reg *cfa = &insn->cfi.cfa;



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/9] objtool: Correctly handle retpoline thunk calls
  2021-03-12 17:16 ` [PATCH 2/9] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
@ 2021-03-16 21:19   ` Josh Poimboeuf
  0 siblings, 0 replies; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-16 21:19 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Fri, Mar 12, 2021 at 06:16:15PM +0100, Peter Zijlstra wrote:
> Just like JMP handling, convert a direct CALL to a retpoline thunk
> into a retpoline safe indirect CALL.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  tools/objtool/check.c |   12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -953,6 +953,18 @@ static int add_call_destinations(struct
>  					  dest_off);
>  				return -1;
>  			}
> +
> +		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
> +			/*
> +			 * Retpoline calls are really dynamic calls in
> +			 * disguise, so convert them accodingly.

s/accodingly/accordingly/

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/9] objtool: Fix static_call list generation
  2021-03-12 17:16 ` [PATCH 4/9] objtool: Fix static_call list generation Peter Zijlstra
@ 2021-03-17  3:18   ` Josh Poimboeuf
  0 siblings, 0 replies; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-17  3:18 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Fri, Mar 12, 2021 at 06:16:17PM +0100, Peter Zijlstra wrote:
> @@ -1701,6 +1706,9 @@ static int decode_sections(struct objtoo
>  	if (ret)
>  		return ret;
>  
> +	/*
> +	 * Must be before add_{jump_call}_desetination.
> +	 */

s/desetination/destination/

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-12 17:16 ` [PATCH 5/9] objtool: Rework rebuild_reloc logic Peter Zijlstra
@ 2021-03-17  3:34   ` Josh Poimboeuf
  2021-03-17  8:12     ` Peter Zijlstra
  0 siblings, 1 reply; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-17  3:34 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Fri, Mar 12, 2021 at 06:16:18PM +0100, Peter Zijlstra wrote:
> --- a/tools/objtool/elf.c
> +++ b/tools/objtool/elf.c
> @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, stru
>  
>  	list_add_tail(&reloc->list, &sec->reloc_list);
>  	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
> +
> +	sec->rereloc = true;
>  }

Can we just reuse sec->changed for this?  Something like this on top
(untested of course):

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index addcec88ac9f..b9cb74a54681 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -480,7 +480,7 @@ void elf_add_reloc(struct elf *elf, struct reloc *reloc)
 	list_add_tail(&reloc->list, &sec->reloc_list);
 	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
 
-	sec->rereloc = true;
+	sec->changed = true;
 }
 
 static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
@@ -882,9 +882,6 @@ int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
 	struct reloc *reloc;
 	int nr;
 
-	sec->changed = true;
-	elf->changed = true;
-
 	nr = 0;
 	list_for_each_entry(reloc, &sec->reloc_list, list)
 		nr++;
@@ -894,8 +891,6 @@ int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
 	case SHT_RELA: return elf_rebuild_rela_reloc_section(sec, nr);
 	default:       return -1;
 	}
-
-	sec->rereloc = false;
 }
 
 int elf_write_insn(struct elf *elf, struct section *sec,
@@ -950,14 +945,15 @@ int elf_write(struct elf *elf)
 	struct section *sec;
 	Elf_Scn *s;
 
-	list_for_each_entry(sec, &elf->sections, list) {
-		if (sec->reloc && sec->reloc->rereloc)
-			elf_rebuild_reloc_section(elf, sec->reloc);
-	}
-
-	/* Update section headers for changed sections: */
+	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
+			if (sec->reloc &&
+			    elf_rebuild_reloc_section(elf, sec->reloc)) {
+				WARN_ELF("elf_rebuild_reloc_section");
+				return -1;
+			}
+
 			s = elf_getscn(elf->elf, sec->idx);
 			if (!s) {
 				WARN_ELF("elf_getscn");
@@ -969,6 +965,7 @@ int elf_write(struct elf *elf)
 			}
 
 			sec->changed = false;
+			elf->changed = true;
 		}
 	}
 
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 9fdd4c5f9f32..e6890cc70a25 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -39,7 +39,7 @@ struct section {
 	char *name;
 	int idx;
 	unsigned int len;
-	bool changed, text, rodata, noinstr, rereloc;
+	bool changed, text, rodata, noinstr;
 };
 
 struct symbol {


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-17  3:34   ` Josh Poimboeuf
@ 2021-03-17  8:12     ` Peter Zijlstra
  2021-03-18  0:49       ` Josh Poimboeuf
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-17  8:12 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: x86, jgross, mbenes, linux-kernel

On Tue, Mar 16, 2021 at 10:34:17PM -0500, Josh Poimboeuf wrote:
> On Fri, Mar 12, 2021 at 06:16:18PM +0100, Peter Zijlstra wrote:
> > --- a/tools/objtool/elf.c
> > +++ b/tools/objtool/elf.c
> > @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, stru
> >  
> >  	list_add_tail(&reloc->list, &sec->reloc_list);
> >  	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
> > +
> > +	sec->rereloc = true;
> >  }
> 
> Can we just reuse sec->changed for this?  Something like this on top
> (untested of course):

I think my worry was that we'd dirty too much and slow down the write,
but I haven't done any actual performance measurements on this.

Let me do a few runs and see if it matters at all.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-12 17:16 ` [PATCH 6/9] objtool: Add elf_create_undef_symbol() Peter Zijlstra
@ 2021-03-17 13:52   ` Miroslav Benes
  2021-03-17 14:13     ` Peter Zijlstra
  0 siblings, 1 reply; 28+ messages in thread
From: Miroslav Benes @ 2021-03-17 13:52 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, jgross, linux-kernel

[ also correcting my e-mail address ]

On Fri, 12 Mar 2021, Peter Zijlstra wrote:

Just a remark regarding SHN_XINDEX...

> +static bool elf_symbol_add(struct elf *elf, struct symbol *sym, Elf32_Word shndx)
> +{
> +	struct list_head *entry;
> +	struct rb_node *pnode;
> +
> +	sym->type = GELF_ST_TYPE(sym->sym.st_info);
> +	sym->bind = GELF_ST_BIND(sym->sym.st_info);
> +
> +	if ((sym->sym.st_shndx > SHN_UNDEF &&
> +	     sym->sym.st_shndx < SHN_LORESERVE) ||
> +	    (shndx != SHN_XINDEX && sym->sym.st_shndx == SHN_XINDEX)) {
> +		if (sym->sym.st_shndx != SHN_XINDEX)
> +			shndx = sym->sym.st_shndx;
> +
> +		sym->sec = find_section_by_index(elf, shndx);
> +		if (!sym->sec) {
> +			WARN("couldn't find section for symbol %s",
> +			     sym->name);
> +			return false;
> +		}

...

> @@ -366,47 +414,11 @@ static int read_symbols(struct elf *elf)
>  			goto err;
>  		}
>  
> -		sym->type = GELF_ST_TYPE(sym->sym.st_info);
> -		sym->bind = GELF_ST_BIND(sym->sym.st_info);
> +		if (!shndx_data)
> +			shndx = SHN_XINDEX;
>  
> -		if ((sym->sym.st_shndx > SHN_UNDEF &&
> -		     sym->sym.st_shndx < SHN_LORESERVE) ||
> -		    (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
> -			if (sym->sym.st_shndx != SHN_XINDEX)
> -				shndx = sym->sym.st_shndx;
> -
> -			sym->sec = find_section_by_index(elf, shndx);
> -			if (!sym->sec) {
> -				WARN("couldn't find section for symbol %s",
> -				     sym->name);
> -				goto err;

...

> +	if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
> +		WARN("elf_symbol_add");
> +		return NULL;
> +	}

SHN_XINDEX means that the extended section index is used. Above you seem 
to use it in the opposite sense too (assigning to shndx when shndx_data is 
NULL). While it makes the code easier to handle, it is a bit confusing 
(and maybe I am just confused now). Could you add a comment about that, 
please? elf_symbol_add() seems like a good place.

Miroslav

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-17 13:52   ` Miroslav Benes
@ 2021-03-17 14:13     ` Peter Zijlstra
  2021-03-17 14:39       ` Miroslav Benes
  2021-03-18  0:46       ` Josh Poimboeuf
  0 siblings, 2 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-17 14:13 UTC (permalink / raw)
  To: Miroslav Benes; +Cc: x86, jpoimboe, jgross, linux-kernel

On Wed, Mar 17, 2021 at 02:52:23PM +0100, Miroslav Benes wrote:

> > +	if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
> > +		WARN("elf_symbol_add");
> > +		return NULL;
> > +	}
> 
> SHN_XINDEX means that the extended section index is used. Above you seem 
> to use it in the opposite sense too (assigning to shndx when shndx_data is 
> NULL). While it makes the code easier to handle, it is a bit confusing 
> (and maybe I am just confused now). Could you add a comment about that, 
> please? elf_symbol_add() seems like a good place.

Yes, that was a horrible thing to do :/ And you understood it right.

Looking at it again, I'm not sure it is actually correct tho; shouldn't
elf_create_undef_symbol() also look at gelf_getsymshndx() of symtab ?

What toolchain generates these extended sections and how? That is, how
do I test this crud..



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-17 14:13     ` Peter Zijlstra
@ 2021-03-17 14:39       ` Miroslav Benes
  2021-03-17 15:08         ` Sami Tolvanen
  2021-03-18  0:46       ` Josh Poimboeuf
  1 sibling, 1 reply; 28+ messages in thread
From: Miroslav Benes @ 2021-03-17 14:39 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, jgross, linux-kernel, samitolvanen

On Wed, 17 Mar 2021, Peter Zijlstra wrote:

> On Wed, Mar 17, 2021 at 02:52:23PM +0100, Miroslav Benes wrote:
> 
> > > +	if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
> > > +		WARN("elf_symbol_add");
> > > +		return NULL;
> > > +	}
> > 
> > SHN_XINDEX means that the extended section index is used. Above you seem 
> > to use it in the opposite sense too (assigning to shndx when shndx_data is 
> > NULL). While it makes the code easier to handle, it is a bit confusing 
> > (and maybe I am just confused now). Could you add a comment about that, 
> > please? elf_symbol_add() seems like a good place.
> 
> Yes, that was a horrible thing to do :/ And you understood it right.
> 
> Looking at it again, I'm not sure it is actually correct tho; shouldn't
> elf_create_undef_symbol() also look at gelf_getsymshndx() of symtab ?

Probably yes.
 
> What toolchain generates these extended sections and how? That is, how
> do I test this crud..

Sami might know.

Miroslav

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-17 14:39       ` Miroslav Benes
@ 2021-03-17 15:08         ` Sami Tolvanen
  0 siblings, 0 replies; 28+ messages in thread
From: Sami Tolvanen @ 2021-03-17 15:08 UTC (permalink / raw)
  To: Miroslav Benes; +Cc: Peter Zijlstra, X86 ML, Josh Poimboeuf, jgross, LKML

On Wed, Mar 17, 2021 at 7:40 AM Miroslav Benes <mbenes@suse.cz> wrote:
>
> On Wed, 17 Mar 2021, Peter Zijlstra wrote:
>
> > On Wed, Mar 17, 2021 at 02:52:23PM +0100, Miroslav Benes wrote:
> >
> > > > + if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
> > > > +         WARN("elf_symbol_add");
> > > > +         return NULL;
> > > > + }
> > >
> > > SHN_XINDEX means that the extended section index is used. Above you seem
> > > to use it in the opposite sense too (assigning to shndx when shndx_data is
> > > NULL). While it makes the code easier to handle, it is a bit confusing
> > > (and maybe I am just confused now). Could you add a comment about that,
> > > please? elf_symbol_add() seems like a good place.
> >
> > Yes, that was a horrible thing to do :/ And you understood it right.
> >
> > Looking at it again, I'm not sure it is actually correct tho; shouldn't
> > elf_create_undef_symbol() also look at gelf_getsymshndx() of symtab ?
>
> Probably yes.
>
> > What toolchain generates these extended sections and how? That is, how
> > do I test this crud..
>
> Sami might know.

Clang generates these with LTO for vmlinux.o, but I'm guessing gcc
will do the same with -ffunction-sections -fdata-sections.

Sami

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-17 14:13     ` Peter Zijlstra
  2021-03-17 14:39       ` Miroslav Benes
@ 2021-03-18  0:46       ` Josh Poimboeuf
  2021-03-18  7:56         ` Peter Zijlstra
  1 sibling, 1 reply; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-18  0:46 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Miroslav Benes, x86, jgross, linux-kernel

On Wed, Mar 17, 2021 at 03:13:43PM +0100, Peter Zijlstra wrote:
> On Wed, Mar 17, 2021 at 02:52:23PM +0100, Miroslav Benes wrote:
> 
> > > +	if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
> > > +		WARN("elf_symbol_add");
> > > +		return NULL;
> > > +	}
> > 
> > SHN_XINDEX means that the extended section index is used. Above you seem 
> > to use it in the opposite sense too (assigning to shndx when shndx_data is 
> > NULL). While it makes the code easier to handle, it is a bit confusing 
> > (and maybe I am just confused now). Could you add a comment about that, 
> > please? elf_symbol_add() seems like a good place.
> 
> Yes, that was a horrible thing to do :/ And you understood it right.
> 
> Looking at it again, I'm not sure it is actually correct tho; shouldn't
> elf_create_undef_symbol() also look at gelf_getsymshndx() of symtab ?
> 
> What toolchain generates these extended sections and how? That is, how
> do I test this crud..

SHN_XINDEX is basically a special-case extension to original ELF for
supporting more than 64k sections.

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-17  8:12     ` Peter Zijlstra
@ 2021-03-18  0:49       ` Josh Poimboeuf
  2021-03-18 12:57         ` Peter Zijlstra
  0 siblings, 1 reply; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-18  0:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Wed, Mar 17, 2021 at 09:12:15AM +0100, Peter Zijlstra wrote:
> On Tue, Mar 16, 2021 at 10:34:17PM -0500, Josh Poimboeuf wrote:
> > On Fri, Mar 12, 2021 at 06:16:18PM +0100, Peter Zijlstra wrote:
> > > --- a/tools/objtool/elf.c
> > > +++ b/tools/objtool/elf.c
> > > @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, stru
> > >  
> > >  	list_add_tail(&reloc->list, &sec->reloc_list);
> > >  	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
> > > +
> > > +	sec->rereloc = true;
> > >  }
> > 
> > Can we just reuse sec->changed for this?  Something like this on top
> > (untested of course):
> 
> I think my worry was that we'd dirty too much and slow down the write,
> but I haven't done any actual performance measurements on this.

Really?  I thought my proposal was purely aesthetic, no functional
change, but my brain is toasty this week due to other distractions so
who knows.

> Let me do a few runs and see if it matters at all.

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/9] objtool: Add elf_create_undef_symbol()
  2021-03-18  0:46       ` Josh Poimboeuf
@ 2021-03-18  7:56         ` Peter Zijlstra
  0 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-18  7:56 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: Miroslav Benes, x86, jgross, linux-kernel

On Wed, Mar 17, 2021 at 07:46:14PM -0500, Josh Poimboeuf wrote:
> On Wed, Mar 17, 2021 at 03:13:43PM +0100, Peter Zijlstra wrote:
> > On Wed, Mar 17, 2021 at 02:52:23PM +0100, Miroslav Benes wrote:
> > 
> > > > +	if (!elf_symbol_add(elf, sym, SHN_XINDEX)) {
> > > > +		WARN("elf_symbol_add");
> > > > +		return NULL;
> > > > +	}
> > > 
> > > SHN_XINDEX means that the extended section index is used. Above you seem 
> > > to use it in the opposite sense too (assigning to shndx when shndx_data is 
> > > NULL). While it makes the code easier to handle, it is a bit confusing 
> > > (and maybe I am just confused now). Could you add a comment about that, 
> > > please? elf_symbol_add() seems like a good place.
> > 
> > Yes, that was a horrible thing to do :/ And you understood it right.
> > 
> > Looking at it again, I'm not sure it is actually correct tho; shouldn't
> > elf_create_undef_symbol() also look at gelf_getsymshndx() of symtab ?
> > 
> > What toolchain generates these extended sections and how? That is, how
> > do I test this crud..
> 
> SHN_XINDEX is basically a special-case extension to original ELF for
> supporting more than 64k sections.

Yeah, I figured it all out again last night; the patch as presented is
hideous but correct(ish). But I've rewritten it and it's all much better
now. Will post later.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-18  0:49       ` Josh Poimboeuf
@ 2021-03-18 12:57         ` Peter Zijlstra
  2021-03-18 16:36           ` Josh Poimboeuf
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-18 12:57 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: x86, jgross, mbenes, linux-kernel

On Wed, Mar 17, 2021 at 07:49:17PM -0500, Josh Poimboeuf wrote:
> On Wed, Mar 17, 2021 at 09:12:15AM +0100, Peter Zijlstra wrote:
> > On Tue, Mar 16, 2021 at 10:34:17PM -0500, Josh Poimboeuf wrote:
> > > On Fri, Mar 12, 2021 at 06:16:18PM +0100, Peter Zijlstra wrote:
> > > > --- a/tools/objtool/elf.c
> > > > +++ b/tools/objtool/elf.c
> > > > @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, stru
> > > >  
> > > >  	list_add_tail(&reloc->list, &sec->reloc_list);
> > > >  	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
> > > > +
> > > > +	sec->rereloc = true;
> > > >  }
> > > 
> > > Can we just reuse sec->changed for this?  Something like this on top
> > > (untested of course):
> > 
> > I think my worry was that we'd dirty too much and slow down the write,
> > but I haven't done any actual performance measurements on this.
> 
> Really?  I thought my proposal was purely aesthetic, no functional
> change, but my brain is toasty this week due to other distractions so
> who knows.

I was thinking you could get a section changed without touching
relocations, but while that is theoretically possible, it is exceedingly
unlikely (and objtool doesn't do that).

Because if entries have relocations, then adding an entry will also add
relocations etc..

pre: 79.269 +- 0.104 seconds time elapsed  ( +-  0.13% )
post: 79.0604 +- 0.0441 seconds time elapsed  ( +-  0.06% )
fini: 79.2995 +- 0.0448 seconds time elapsed  ( +-  0.06% )

is what I get for kbuild x86_64-defconfig-ish build times with the
various patches applied. Which is all noise afaict. I'll fold your
thing. Less is more etc..

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-18 12:57         ` Peter Zijlstra
@ 2021-03-18 16:36           ` Josh Poimboeuf
  2021-03-18 17:04             ` Peter Zijlstra
  0 siblings, 1 reply; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-18 16:36 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Thu, Mar 18, 2021 at 01:57:03PM +0100, Peter Zijlstra wrote:
> On Wed, Mar 17, 2021 at 07:49:17PM -0500, Josh Poimboeuf wrote:
> > On Wed, Mar 17, 2021 at 09:12:15AM +0100, Peter Zijlstra wrote:
> > > On Tue, Mar 16, 2021 at 10:34:17PM -0500, Josh Poimboeuf wrote:
> > > > On Fri, Mar 12, 2021 at 06:16:18PM +0100, Peter Zijlstra wrote:
> > > > > --- a/tools/objtool/elf.c
> > > > > +++ b/tools/objtool/elf.c
> > > > > @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, stru
> > > > >  
> > > > >  	list_add_tail(&reloc->list, &sec->reloc_list);
> > > > >  	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
> > > > > +
> > > > > +	sec->rereloc = true;
> > > > >  }
> > > > 
> > > > Can we just reuse sec->changed for this?  Something like this on top
> > > > (untested of course):
> > > 
> > > I think my worry was that we'd dirty too much and slow down the write,
> > > but I haven't done any actual performance measurements on this.
> > 
> > Really?  I thought my proposal was purely aesthetic, no functional
> > change, but my brain is toasty this week due to other distractions so
> > who knows.
> 
> I was thinking you could get a section changed without touching
> relocations, but while that is theoretically possible, it is exceedingly
> unlikely (and objtool doesn't do that).

Hm?  This is a *relocation* section, not a normal one.  So by
definition, it only changes when its relocations change.

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-18 16:36           ` Josh Poimboeuf
@ 2021-03-18 17:04             ` Peter Zijlstra
  2021-03-18 17:38               ` Josh Poimboeuf
  0 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-18 17:04 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: x86, jgross, mbenes, linux-kernel

On Thu, Mar 18, 2021 at 11:36:40AM -0500, Josh Poimboeuf wrote:
> > I was thinking you could get a section changed without touching
> > relocations, but while that is theoretically possible, it is exceedingly
> > unlikely (and objtool doesn't do that).
> 
> Hm?  This is a *relocation* section, not a normal one.  So by
> definition, it only changes when its relocations change.

The way I read this code:

 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
+			if (sec->reloc &&
+			    elf_rebuild_reloc_section(elf, sec->reloc)) {
+				WARN_ELF("elf_rebuild_reloc_section");
+				return -1;
+			}

is that we iterate the regular sections (which could be dirtied because
we changed some data), and if that section has a relocation section, we
rebuild that for good measure (even though it might not have altered
relocations).

Or am I just totally confused ?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-18 17:04             ` Peter Zijlstra
@ 2021-03-18 17:38               ` Josh Poimboeuf
  2021-03-19  0:19                 ` Josh Poimboeuf
  2021-03-19  9:22                 ` Peter Zijlstra
  0 siblings, 2 replies; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-18 17:38 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Thu, Mar 18, 2021 at 06:04:25PM +0100, Peter Zijlstra wrote:
> On Thu, Mar 18, 2021 at 11:36:40AM -0500, Josh Poimboeuf wrote:
> > > I was thinking you could get a section changed without touching
> > > relocations, but while that is theoretically possible, it is exceedingly
> > > unlikely (and objtool doesn't do that).
> > 
> > Hm?  This is a *relocation* section, not a normal one.  So by
> > definition, it only changes when its relocations change.
> 
> The way I read this code:
> 
>  	list_for_each_entry(sec, &elf->sections, list) {
>  		if (sec->changed) {
> +			if (sec->reloc &&
> +			    elf_rebuild_reloc_section(elf, sec->reloc)) {
> +				WARN_ELF("elf_rebuild_reloc_section");
> +				return -1;
> +			}
> 
> is that we iterate the regular sections (which could be dirtied because
> we changed some data), and if that section has a relocation section, we
> rebuild that for good measure (even though it might not have altered
> relocations).
> 
> Or am I just totally confused ?

Ah, you're right.  I'm the one that's confused.  I guess I was also
confused when I wrote that hunk, but it just happens to work anyway.

It would be cleaner to do something like

			if ((is_reloc_sec(sec) &&	
			    elf_rebuild_reloc_section(elf, sec)) {

so we process the changed reloc section directly, instead of relying on
the (most likely) fact that the corresponding text section also changed.

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-18 17:38               ` Josh Poimboeuf
@ 2021-03-19  0:19                 ` Josh Poimboeuf
  2021-03-19  9:22                 ` Peter Zijlstra
  1 sibling, 0 replies; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-19  0:19 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Thu, Mar 18, 2021 at 12:38:42PM -0500, Josh Poimboeuf wrote:
> On Thu, Mar 18, 2021 at 06:04:25PM +0100, Peter Zijlstra wrote:
> > On Thu, Mar 18, 2021 at 11:36:40AM -0500, Josh Poimboeuf wrote:
> > > > I was thinking you could get a section changed without touching
> > > > relocations, but while that is theoretically possible, it is exceedingly
> > > > unlikely (and objtool doesn't do that).
> > > 
> > > Hm?  This is a *relocation* section, not a normal one.  So by
> > > definition, it only changes when its relocations change.
> > 
> > The way I read this code:
> > 
> >  	list_for_each_entry(sec, &elf->sections, list) {
> >  		if (sec->changed) {
> > +			if (sec->reloc &&
> > +			    elf_rebuild_reloc_section(elf, sec->reloc)) {
> > +				WARN_ELF("elf_rebuild_reloc_section");
> > +				return -1;
> > +			}
> > 
> > is that we iterate the regular sections (which could be dirtied because
> > we changed some data), and if that section has a relocation section, we
> > rebuild that for good measure (even though it might not have altered
> > relocations).
> > 
> > Or am I just totally confused ?
> 
> Ah, you're right.  I'm the one that's confused.  I guess I was also
> confused when I wrote that hunk, but it just happens to work anyway.
> 
> It would be cleaner to do something like
> 
> 			if ((is_reloc_sec(sec) &&	
> 			    elf_rebuild_reloc_section(elf, sec)) {
> 
> so we process the changed reloc section directly, instead of relying on
> the (most likely) fact that the corresponding text section also changed.

i.e., in actual code:

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 66c49c2e20a6..3b3d19a5e626 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -948,8 +948,7 @@ int elf_write(struct elf *elf)
 	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
-			if (sec->reloc &&
-			    elf_rebuild_reloc_section(elf, sec->reloc)) {
+			if (sec->base && elf_rebuild_reloc_section(elf, sec)) {
 				WARN_ELF("elf_rebuild_reloc_section");
 				return -1;
 			}


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-18 17:38               ` Josh Poimboeuf
  2021-03-19  0:19                 ` Josh Poimboeuf
@ 2021-03-19  9:22                 ` Peter Zijlstra
  2021-03-19 15:15                   ` Josh Poimboeuf
  1 sibling, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2021-03-19  9:22 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: x86, jgross, mbenes, linux-kernel

On Thu, Mar 18, 2021 at 12:38:42PM -0500, Josh Poimboeuf wrote:
> On Thu, Mar 18, 2021 at 06:04:25PM +0100, Peter Zijlstra wrote:
> > On Thu, Mar 18, 2021 at 11:36:40AM -0500, Josh Poimboeuf wrote:
> > > > I was thinking you could get a section changed without touching
> > > > relocations, but while that is theoretically possible, it is exceedingly
> > > > unlikely (and objtool doesn't do that).
> > > 
> > > Hm?  This is a *relocation* section, not a normal one.  So by
> > > definition, it only changes when its relocations change.
> > 
> > The way I read this code:
> > 
> >  	list_for_each_entry(sec, &elf->sections, list) {
> >  		if (sec->changed) {
> > +			if (sec->reloc &&
> > +			    elf_rebuild_reloc_section(elf, sec->reloc)) {
> > +				WARN_ELF("elf_rebuild_reloc_section");
> > +				return -1;
> > +			}
> > 
> > is that we iterate the regular sections (which could be dirtied because
> > we changed some data), and if that section has a relocation section, we
> > rebuild that for good measure (even though it might not have altered
> > relocations).
> > 
> > Or am I just totally confused ?
> 
> Ah, you're right.  I'm the one that's confused.  I guess I was also
> confused when I wrote that hunk, but it just happens to work anyway.
> 
> It would be cleaner to do something like
> 
> 			if ((is_reloc_sec(sec) &&	
> 			    elf_rebuild_reloc_section(elf, sec)) {
> 
> so we process the changed reloc section directly, instead of relying on
> the (most likely) fact that the corresponding text section also changed.

Indeed. Done.

--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -814,6 +814,11 @@ struct section *elf_create_reloc_section
 	}
 }
 
+static inline bool is_reloc_section(struct section *reloc)
+{
+	return reloc->base && reloc->base->reloc == reloc;
+}
+
 static int elf_rebuild_rel_reloc_section(struct section *sec, int nr)
 {
 	struct reloc *reloc;
@@ -948,7 +953,7 @@ int elf_write(struct elf *elf)
 	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
-			if (sec->reloc &&
+			if (is_reloc_section(sec) &&
 			    elf_rebuild_reloc_section(elf, sec->reloc)) {
 				WARN_ELF("elf_rebuild_reloc_section");
 				return -1;

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/9] objtool: Rework rebuild_reloc logic
  2021-03-19  9:22                 ` Peter Zijlstra
@ 2021-03-19 15:15                   ` Josh Poimboeuf
  0 siblings, 0 replies; 28+ messages in thread
From: Josh Poimboeuf @ 2021-03-19 15:15 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jgross, mbenes, linux-kernel

On Fri, Mar 19, 2021 at 10:22:54AM +0100, Peter Zijlstra wrote:
> @@ -814,6 +814,11 @@ struct section *elf_create_reloc_section
>  	}
>  }
>  
> +static inline bool is_reloc_section(struct section *reloc)
> +{
> +	return reloc->base && reloc->base->reloc == reloc;
> +}

I believe the 2nd condition will always be true, so it can just be
'return reloc->base'.

-- 
Josh


^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2021-03-19 15:16 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-12 17:16 [PATCH 0/9] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
2021-03-12 17:16 ` [PATCH 1/9] x86/retpoline: Simplify retpolines Peter Zijlstra
2021-03-12 17:16 ` [PATCH 2/9] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
2021-03-16 21:19   ` Josh Poimboeuf
2021-03-12 17:16 ` [PATCH 3/9] objtool: Per arch retpoline naming Peter Zijlstra
2021-03-12 17:16 ` [PATCH 4/9] objtool: Fix static_call list generation Peter Zijlstra
2021-03-17  3:18   ` Josh Poimboeuf
2021-03-12 17:16 ` [PATCH 5/9] objtool: Rework rebuild_reloc logic Peter Zijlstra
2021-03-17  3:34   ` Josh Poimboeuf
2021-03-17  8:12     ` Peter Zijlstra
2021-03-18  0:49       ` Josh Poimboeuf
2021-03-18 12:57         ` Peter Zijlstra
2021-03-18 16:36           ` Josh Poimboeuf
2021-03-18 17:04             ` Peter Zijlstra
2021-03-18 17:38               ` Josh Poimboeuf
2021-03-19  0:19                 ` Josh Poimboeuf
2021-03-19  9:22                 ` Peter Zijlstra
2021-03-19 15:15                   ` Josh Poimboeuf
2021-03-12 17:16 ` [PATCH 6/9] objtool: Add elf_create_undef_symbol() Peter Zijlstra
2021-03-17 13:52   ` Miroslav Benes
2021-03-17 14:13     ` Peter Zijlstra
2021-03-17 14:39       ` Miroslav Benes
2021-03-17 15:08         ` Sami Tolvanen
2021-03-18  0:46       ` Josh Poimboeuf
2021-03-18  7:56         ` Peter Zijlstra
2021-03-12 17:16 ` [PATCH 7/9] objtool: Allow archs to rewrite retpolines Peter Zijlstra
2021-03-12 17:16 ` [PATCH 8/9] objtool: Skip magical retpoline .altinstr_replacement Peter Zijlstra
2021-03-12 17:16 ` [PATCH 9/9] objtool,x86: Rewrite retpoline thunk calls Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).