linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels
@ 2021-05-06 19:33 Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
                   ` (12 more replies)
  0 siblings, 13 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Hi,

I figured that since objtool is rewriting things anyway, we can also use it to
do the variable sized jump_labels.

previous attempts are here:

  https://lore.kernel.org/lkml/20191007084443.79370128.1@infradead.org/
  https://lore.kernel.org/lkml/1318007374.4729.58.camel@gandalf.stny.rr.com/


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 10:41   ` Peter Zijlstra
                     ` (2 more replies)
  2021-05-06 19:33 ` [PATCH 02/13] x86,objtool: Dont exclude arch/x86/realmode/ Peter Zijlstra
                   ` (11 subsequent siblings)
  12 siblings, 3 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Currently objtool has 5 hashtables and sizes them 16 or 20 bits
depending on the --vmlinux argument.

However, a single side doesn't really work well for the 5 tables,
which among them, cover 3 different uses. Also, while vmlinux is
larger, there is still a very wide difference between a defconfig and
allyesconfig build, which again isn't optimally covered by a single
size.

Another aspect is the cost of elf_hash_init(), which for large tables
dominates the runtime for small input files. It turns out that all it
does it assign NULL, something that is required when using malloc().
However, when we allocate memory using mmap(), we're guaranteed to get
zero filled pages.

Therefore, rewrite the whole thing to:

 1) use more dynamic sized tables, depending on the input file,
 2) avoid the need for elf_hash_init() entirely by using mmap().

This speeds up a regular kernel build (100s to 98s for
x86_64-defconfig), and potentially dramatically speeds up vmlinux
processing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/elf.c                 |   95 +++++++++++++++++++++++-------------
 tools/objtool/include/objtool/elf.h |   17 ++++--
 2 files changed, 73 insertions(+), 39 deletions(-)

--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -9,6 +9,7 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -27,21 +28,27 @@ static inline u32 str_hash(const char *s
 	return jhash(str, strlen(str), 0);
 }
 
-static inline int elf_hash_bits(void)
-{
-	return vmlinux ? ELF_HASH_BITS : 16;
-}
+#define __elf_table(name)	(elf->name##_hash)
+#define __elf_bits(name)	(elf->name##_bits)
 
-#define elf_hash_add(hashtable, node, key) \
-	hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+#define elf_hash_add(name, node, key) \
+	hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))])
 
-static void elf_hash_init(struct hlist_head *table)
-{
-	__hash_init(table, 1U << elf_hash_bits());
-}
+#define elf_hash_for_each_possible(name, obj, member, key) \
+	hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member)
 
-#define elf_hash_for_each_possible(name, obj, member, key)			\
-	hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+#define elf_alloc_hash(name, size) \
+({ \
+	__elf_bits(name) = max(10, ilog2(size)); \
+	__elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \
+				 PROT_READ|PROT_WRITE, \
+				 MAP_PRIVATE|MAP_ANON, -1, 0); \
+	if (__elf_table(name) == (void *)-1L) { \
+		WARN("mmap fail " #name); \
+		__elf_table(name) = NULL; \
+	} \
+	__elf_table(name); \
+})
 
 static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
 {
@@ -80,9 +87,10 @@ struct section *find_section_by_name(con
 {
 	struct section *sec;
 
-	elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+	elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) {
 		if (!strcmp(sec->name, name))
 			return sec;
+	}
 
 	return NULL;
 }
@@ -92,9 +100,10 @@ static struct section *find_section_by_i
 {
 	struct section *sec;
 
-	elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
+	elf_hash_for_each_possible(section, sec, hash, idx) {
 		if (sec->idx == idx)
 			return sec;
+	}
 
 	return NULL;
 }
@@ -103,9 +112,10 @@ static struct symbol *find_symbol_by_ind
 {
 	struct symbol *sym;
 
-	elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+	elf_hash_for_each_possible(symbol, sym, hash, idx) {
 		if (sym->idx == idx)
 			return sym;
+	}
 
 	return NULL;
 }
@@ -170,9 +180,10 @@ struct symbol *find_symbol_by_name(const
 {
 	struct symbol *sym;
 
-	elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+	elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
 		if (!strcmp(sym->name, name))
 			return sym;
+	}
 
 	return NULL;
 }
@@ -189,8 +200,8 @@ struct reloc *find_reloc_by_dest_range(c
 	sec = sec->reloc;
 
 	for_offset_range(o, offset, offset + len) {
-		elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
-				       sec_offset_hash(sec, o)) {
+		elf_hash_for_each_possible(reloc, reloc, hash,
+					   sec_offset_hash(sec, o)) {
 			if (reloc->sec != sec)
 				continue;
 
@@ -228,6 +239,10 @@ static int read_sections(struct elf *elf
 		return -1;
 	}
 
+	if (!elf_alloc_hash(section, sections_nr) ||
+	    !elf_alloc_hash(section_name, sections_nr))
+		return -1;
+
 	for (i = 0; i < sections_nr; i++) {
 		sec = malloc(sizeof(*sec));
 		if (!sec) {
@@ -274,12 +289,14 @@ static int read_sections(struct elf *elf
 		sec->len = sec->sh.sh_size;
 
 		list_add_tail(&sec->list, &elf->sections);
-		elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
-		elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+		elf_hash_add(section, &sec->hash, sec->idx);
+		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 	}
 
-	if (stats)
+	if (stats) {
 		printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+		printf("section_bits: %d\n", elf->section_bits);
+	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
 	if (elf_nextscn(elf->elf, s)) {
@@ -308,8 +325,8 @@ static void elf_add_symbol(struct elf *e
 	else
 		entry = &sym->sec->symbol_list;
 	list_add(&sym->list, entry);
-	elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
-	elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+	elf_hash_add(symbol, &sym->hash, sym->idx);
+	elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
 
 	/*
 	 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
@@ -343,6 +360,10 @@ static int read_symbols(struct elf *elf)
 
 	symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
 
+	if (!elf_alloc_hash(symbol, symbols_nr) ||
+	    !elf_alloc_hash(symbol_name, symbols_nr))
+		return -1;
+
 	for (i = 0; i < symbols_nr; i++) {
 		sym = malloc(sizeof(*sym));
 		if (!sym) {
@@ -389,8 +410,10 @@ static int read_symbols(struct elf *elf)
 		elf_add_symbol(elf, sym);
 	}
 
-	if (stats)
+	if (stats) {
 		printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+		printf("symbol_bits: %d\n", elf->symbol_bits);
+	}
 
 	/* Create parent/child links for any cold subfunctions */
 	list_for_each_entry(sec, &elf->sections, list) {
@@ -479,7 +502,7 @@ int elf_add_reloc(struct elf *elf, struc
 	reloc->addend = addend;
 
 	list_add_tail(&reloc->list, &sec->reloc->reloc_list);
-	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+	elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
 	sec->reloc->changed = true;
 
@@ -556,6 +579,15 @@ static int read_relocs(struct elf *elf)
 	unsigned int symndx;
 	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
+	sec = find_section_by_name(elf, ".text");
+	if (!sec) {
+		WARN("no .text");
+		return -1;
+	}
+
+	if (!elf_alloc_hash(reloc, sec->len / 16))
+		return -1;
+
 	list_for_each_entry(sec, &elf->sections, list) {
 		if ((sec->sh.sh_type != SHT_RELA) &&
 		    (sec->sh.sh_type != SHT_REL))
@@ -600,7 +632,7 @@ static int read_relocs(struct elf *elf)
 			}
 
 			list_add_tail(&reloc->list, &sec->reloc_list);
-			elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+			elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
 			nr_reloc++;
 		}
@@ -611,6 +643,7 @@ static int read_relocs(struct elf *elf)
 	if (stats) {
 		printf("max_reloc: %lu\n", max_reloc);
 		printf("tot_reloc: %lu\n", tot_reloc);
+		printf("reloc_bits: %d\n", elf->reloc_bits);
 	}
 
 	return 0;
@@ -632,12 +665,6 @@ struct elf *elf_open_read(const char *na
 
 	INIT_LIST_HEAD(&elf->sections);
 
-	elf_hash_init(elf->symbol_hash);
-	elf_hash_init(elf->symbol_name_hash);
-	elf_hash_init(elf->section_hash);
-	elf_hash_init(elf->section_name_hash);
-	elf_hash_init(elf->reloc_hash);
-
 	elf->fd = open(name, flags);
 	if (elf->fd == -1) {
 		fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -850,8 +877,8 @@ struct section *elf_create_section(struc
 		return NULL;
 
 	list_add_tail(&sec->list, &elf->sections);
-	elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
-	elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+	elf_hash_add(section, &sec->hash, sec->idx);
+	elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 
 	elf->changed = true;
 
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -84,11 +84,18 @@ struct elf {
 	bool changed;
 	char *name;
 	struct list_head sections;
-	DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
+
+	int symbol_bits;
+	int symbol_name_bits;
+	int section_bits;
+	int section_name_bits;
+	int reloc_bits;
+
+	struct hlist_head *symbol_hash;
+	struct hlist_head *symbol_name_hash;
+	struct hlist_head *section_hash;
+	struct hlist_head *section_name_hash;
+	struct hlist_head *reloc_hash;
 };
 
 #define OFFSET_STRIDE_BITS	4



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 02/13] x86,objtool: Dont exclude arch/x86/realmode/
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] x86, objtool: " tip-bot2 for Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 03/13] jump_label, x86: Strip ASM jump_label support Peter Zijlstra
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Specifically, init.c uses jump_labels.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/realmode/Makefile |    1 -
 1 file changed, 1 deletion(-)

--- a/arch/x86/realmode/Makefile
+++ b/arch/x86/realmode/Makefile
@@ -10,7 +10,6 @@
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
-OBJECT_FILES_NON_STANDARD	:= y
 
 subdir- := rm
 



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 03/13] jump_label, x86: Strip ASM jump_label support
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 02/13] x86,objtool: Dont exclude arch/x86/realmode/ Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 04/13] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

In prepration for variable size jump_label support; remove all ASM
bits, which are currently unused.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/jump_label.h |   36 ------------------------------------
 1 file changed, 36 deletions(-)

--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -47,42 +47,6 @@ static __always_inline bool arch_static_
 	return true;
 }
 
-#else	/* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.else
-	.byte		BYTES_NOP5
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key - .
-	.popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	.byte		BYTES_NOP5
-	.else
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key + 1 - .
-	.popsection
-.endm
-
 #endif	/* __ASSEMBLY__ */
 
 #endif



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 04/13] jump_label, x86: Factor out the __jump_table generation
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (2 preceding siblings ...)
  2021-05-06 19:33 ` [PATCH 03/13] jump_label, x86: Strip ASM jump_label support Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 05/13] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
                   ` (8 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Both arch_static_branch() and arch_static_branch_jump() have the same
blurb to generate the __jump_table entry, share it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/jump_label.h |   24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -14,15 +14,19 @@
 #include <linux/stringify.h>
 #include <linux/types.h>
 
+#define JUMP_TABLE_ENTRY				\
+	".pushsection __jump_table,  \"aw\" \n\t"	\
+	_ASM_ALIGN "\n\t"				\
+	".long 1b - . \n\t"				\
+	".long %l[l_yes] - . \n\t"			\
+	_ASM_PTR "%c0 + %c1 - .\n\t"			\
+	".popsection \n\t"
+
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
 		".byte " __stringify(BYTES_NOP5) "\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -33,13 +37,9 @@ static __always_inline bool arch_static_
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
-		"2:\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+		".byte 0xe9 \n\t"
+		".long %l[l_yes] - (. + 4) \n\t"
+		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 05/13] jump_label, x86: Improve error when we fail expected text
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (3 preceding siblings ...)
  2021-05-06 19:33 ` [PATCH 04/13] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 06/13] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

There is only a single usage site left, remove the function and extend
the print to include more information, like the expected text and the
patch type.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/jump_label.c |   33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,38 +16,33 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
-static void bug_at(const void *ip, int line)
-{
-	/*
-	 * The location is not an op that we were expecting.
-	 * Something went wrong. Crash the box, as something could be
-	 * corrupting the kernel.
-	 */
-	pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line);
-	BUG();
-}
-
 static const void *
 __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
 {
 	const void *expect, *code;
 	const void *addr, *dest;
-	int line;
 
 	addr = (void *)jump_entry_code(entry);
 	dest = (void *)jump_entry_target(entry);
 
 	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
 
-	if (type == JUMP_LABEL_JMP) {
-		expect = x86_nops[5]; line = __LINE__;
-	} else {
-		expect = code; line = __LINE__;
+	if (type == JUMP_LABEL_JMP)
+		expect = x86_nops[5];
+	else
+		expect = code;
+
+	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) {
+		/*
+		 * The location is not an op that we were expecting.
+		 * Something went wrong. Crash the box, as something could be
+		 * corrupting the kernel.
+		 */
+		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n",
+				addr, addr, addr, expect, type);
+		BUG();
 	}
 
-	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))
-		bug_at(addr, line);
-
 	if (type == JUMP_LABEL_NOP)
 		code = x86_nops[5];
 



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 06/13] jump_label, x86: Introduce jump_entry_size()
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (4 preceding siblings ...)
  2021-05-06 19:33 ` [PATCH 05/13] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:33 ` [PATCH 07/13] jump_label, x86: Add variable length patching support Peter Zijlstra
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

This allows architectures to have variable sized jumps.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/jump_label.h |    4 ++--
 arch/x86/kernel/jump_label.c      |    7 +++++++
 include/linux/jump_label.h        |    9 +++++++++
 kernel/jump_label.c               |    2 +-
 4 files changed, 19 insertions(+), 3 deletions(-)

--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,8 +4,6 @@
 
 #define HAVE_JUMP_LABEL_BATCH
 
-#define JUMP_LABEL_NOP_SIZE 5
-
 #include <asm/asm.h>
 #include <asm/nops.h>
 
@@ -47,6 +45,8 @@ static __always_inline bool arch_static_
 	return true;
 }
 
+extern int arch_jump_entry_size(struct jump_entry *entry);
+
 #endif	/* __ASSEMBLY__ */
 
 #endif
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,6 +16,13 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
+#define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
+
+int arch_jump_entry_size(struct jump_entry *entry)
+{
+	return JMP32_INSN_SIZE;
+}
+
 static const void *
 __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
 {
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -176,6 +176,15 @@ static inline void jump_entry_set_init(s
 	entry->key |= 2;
 }
 
+static inline int jump_entry_size(struct jump_entry *entry)
+{
+#ifdef JUMP_LABEL_NOP_SIZE
+	return JUMP_LABEL_NOP_SIZE;
+#else
+	return arch_jump_entry_size(entry);
+#endif
+}
+
 #endif
 #endif
 
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -309,7 +309,7 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit)
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 {
 	if (jump_entry_code(entry) <= (unsigned long)end &&
-	    jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+	    jump_entry_code(entry) + jump_entry_size(entry) > (unsigned long)start)
 		return 1;
 
 	return 0;



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 07/13] jump_label, x86: Add variable length patching support
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (5 preceding siblings ...)
  2021-05-06 19:33 ` [PATCH 06/13] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
@ 2021-05-06 19:33 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-13 14:16   ` [PATCH 07.5/13] jump_label,x86: Remove unused JUMP_LABEL_NOP_SIZE Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 08/13] jump_label: Free jump_entry::key bit1 for build use Peter Zijlstra
                   ` (5 subsequent siblings)
  12 siblings, 2 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:33 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

This allows the patching to to emit 2 byte JMP/NOP instruction in
addition to the 5 byte JMP/NOP we already did. This allows for more
compact code.

This code is not yet used, as we don't emit shorter code at compile
time yet.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/jump_label.c |   53 ++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 18 deletions(-)

--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -23,44 +23,63 @@ int arch_jump_entry_size(struct jump_ent
 	return JMP32_INSN_SIZE;
 }
 
-static const void *
-__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
+struct jump_label_patch {
+	const void *code;
+	int size;
+};
+
+static struct jump_label_patch
+__jump_label_patch(struct jump_entry *entry, enum jump_label_type type)
 {
-	const void *expect, *code;
+	const void *expect, *code, *nop;
 	const void *addr, *dest;
+	int size;
 
 	addr = (void *)jump_entry_code(entry);
 	dest = (void *)jump_entry_target(entry);
 
-	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+	size = arch_jump_entry_size(entry);
+	switch (size) {
+	case JMP8_INSN_SIZE:
+		code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest);
+		nop = x86_nops[size];
+		break;
+
+	case JMP32_INSN_SIZE:
+		code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+		nop = x86_nops[size];
+		break;
+
+	default: BUG();
+	}
 
 	if (type == JUMP_LABEL_JMP)
-		expect = x86_nops[5];
+		expect = nop;
 	else
 		expect = code;
 
-	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) {
+	if (memcmp(addr, expect, size)) {
 		/*
 		 * The location is not an op that we were expecting.
 		 * Something went wrong. Crash the box, as something could be
 		 * corrupting the kernel.
 		 */
-		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n",
-				addr, addr, addr, expect, type);
+		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n",
+				addr, addr, addr, expect, size, type);
 		BUG();
 	}
 
 	if (type == JUMP_LABEL_NOP)
-		code = x86_nops[5];
+		code = nop;
 
-	return code;
+	return (struct jump_label_patch){.code = code, .size = size};
 }
 
 static inline void __jump_label_transform(struct jump_entry *entry,
 					  enum jump_label_type type,
 					  int init)
 {
-	const void *opcode = __jump_label_set_jump_code(entry, type);
+	const struct jump_label_patch jlp = __jump_label_patch(entry, type);
 
 	/*
 	 * As long as only a single processor is running and the code is still
@@ -74,12 +93,11 @@ static inline void __jump_label_transfor
 	 * always nop being the 'currently valid' instruction
 	 */
 	if (init || system_state == SYSTEM_BOOTING) {
-		text_poke_early((void *)jump_entry_code(entry), opcode,
-				JUMP_LABEL_NOP_SIZE);
+		text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size);
 		return;
 	}
 
-	text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
 }
 
 static void __ref jump_label_transform(struct jump_entry *entry,
@@ -100,7 +118,7 @@ void arch_jump_label_transform(struct ju
 bool arch_jump_label_transform_queue(struct jump_entry *entry,
 				     enum jump_label_type type)
 {
-	const void *opcode;
+	struct jump_label_patch jlp;
 
 	if (system_state == SYSTEM_BOOTING) {
 		/*
@@ -111,9 +129,8 @@ bool arch_jump_label_transform_queue(str
 	}
 
 	mutex_lock(&text_mutex);
-	opcode = __jump_label_set_jump_code(entry, type);
-	text_poke_queue((void *)jump_entry_code(entry),
-			opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	jlp = __jump_label_patch(entry, type);
+	text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
 	mutex_unlock(&text_mutex);
 	return true;
 }



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 08/13] jump_label: Free jump_entry::key bit1 for build use
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (6 preceding siblings ...)
  2021-05-06 19:33 ` [PATCH 07/13] jump_label, x86: Add variable length patching support Peter Zijlstra
@ 2021-05-06 19:34 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 09/13] jump_label,x86: Emit short JMP Peter Zijlstra
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:34 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Have jump_label_init() set jump_entry::key bit1 to either 0 ot 1
unconditionally. This makes it available for build-time games.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/jump_label.h |    7 +++++--
 kernel/jump_label.c        |   10 ++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -171,9 +171,12 @@ static inline bool jump_entry_is_init(co
 	return (unsigned long)entry->key & 2UL;
 }
 
-static inline void jump_entry_set_init(struct jump_entry *entry)
+static inline void jump_entry_set_init(struct jump_entry *entry, bool set)
 {
-	entry->key |= 2;
+	if (set)
+		entry->key |= 2;
+	else
+		entry->key &= ~2;
 }
 
 static inline int jump_entry_size(struct jump_entry *entry)
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -475,13 +475,14 @@ void __init jump_label_init(void)
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
+		bool in_init;
 
 		/* rewrite NOPs */
 		if (jump_label_type(iter) == JUMP_LABEL_NOP)
 			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 
-		if (init_section_contains((void *)jump_entry_code(iter), 1))
-			jump_entry_set_init(iter);
+		in_init = init_section_contains((void *)jump_entry_code(iter), 1);
+		jump_entry_set_init(iter, in_init);
 
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
@@ -626,9 +627,10 @@ static int jump_label_add_module(struct
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
+		bool in_init;
 
-		if (within_module_init(jump_entry_code(iter), mod))
-			jump_entry_set_init(iter);
+		in_init = within_module_init(jump_entry_code(iter), mod);
+		jump_entry_set_init(iter, in_init);
 
 		iterk = jump_entry_key(iter);
 		if (iterk == key)



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 09/13] jump_label,x86: Emit short JMP
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (7 preceding siblings ...)
  2021-05-06 19:34 ` [PATCH 08/13] jump_label: Free jump_entry::key bit1 for build use Peter Zijlstra
@ 2021-05-06 19:34 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] jump_label, x86: " tip-bot2 for Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 10/13] objtool: Decode jump_entry::key addend Peter Zijlstra
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:34 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Now that we can patch short JMP/NOP, allow the compiler/assembler to
emit short JMP instructions.

There is no way to have the assembler emit short NOPs based on the
potential displacement, so leave those long for now.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/jump_label.h |    3 +--
 arch/x86/kernel/jump_label.c      |    8 +++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -35,8 +35,7 @@ static __always_inline bool arch_static_
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-		".byte 0xe9 \n\t"
-		".long %l[l_yes] - (. + 4) \n\t"
+		"jmp %l[l_yes]\n\t"
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -15,12 +15,18 @@
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
+#include <asm/insn.h>
 
 #define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
 
 int arch_jump_entry_size(struct jump_entry *entry)
 {
-	return JMP32_INSN_SIZE;
+	struct insn insn = {};
+
+	insn_decode_kernel(&insn, (void *)jump_entry_code(entry));
+	BUG_ON(insn.length != 2 && insn.length != 5);
+
+	return insn.length;
 }
 
 struct jump_label_patch {



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 10/13] objtool: Decode jump_entry::key addend
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (8 preceding siblings ...)
  2021-05-06 19:34 ` [PATCH 09/13] jump_label,x86: Emit short JMP Peter Zijlstra
@ 2021-05-06 19:34 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 11/13] objtool: Rewrite jump_label instructions Peter Zijlstra
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:34 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Teach objtool about the the low bits in the struct static_key pointer.

That is, the low two bits of @key in:

  struct jump_entry {
	s32 code;
	s32 target;
	long key;
  }

as found in the __jump_table section. Since @key has a relocation to
the variable (to be resolved by the linker), the low two bits will be
reflected in the relocation's addend.

As such, find the reloc and store the addend, such that we can access
these bits.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/arch/x86/include/arch/special.h |    1 +
 tools/objtool/include/objtool/special.h       |    1 +
 tools/objtool/special.c                       |   14 ++++++++++++++
 3 files changed, 16 insertions(+)

--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -9,6 +9,7 @@
 #define JUMP_ENTRY_SIZE		16
 #define JUMP_ORIG_OFFSET	0
 #define JUMP_NEW_OFFSET		4
+#define JUMP_KEY_OFFSET		8
 
 #define ALT_ENTRY_SIZE		12
 #define ALT_ORIG_OFFSET		0
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -27,6 +27,7 @@ struct special_alt {
 	unsigned long new_off;
 
 	unsigned int orig_len, new_len; /* group only */
+	u8 key_addend;
 };
 
 int special_get_alts(struct elf *elf, struct list_head *alts);
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@ struct special_entry {
 	unsigned char size, orig, new;
 	unsigned char orig_len, new_len; /* group only */
 	unsigned char feature; /* ALTERNATIVE macro CPU feature */
+	unsigned char key; /* jump_label key */
 };
 
 struct special_entry entries[] = {
@@ -42,6 +43,7 @@ struct special_entry entries[] = {
 		.size = JUMP_ENTRY_SIZE,
 		.orig = JUMP_ORIG_OFFSET,
 		.new = JUMP_NEW_OFFSET,
+		.key = JUMP_KEY_OFFSET,
 	},
 	{
 		.sec = "__ex_table",
@@ -122,6 +124,18 @@ static int get_alt_entry(struct elf *elf
 			alt->new_off -= 0x7ffffff0;
 	}
 
+	if (entry->key) {
+		struct reloc *key_reloc;
+
+		key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
+		if (!key_reloc) {
+			WARN_FUNC("can't find key reloc",
+				  sec, offset + entry->key);
+			return -1;
+		}
+		alt->key_addend = key_reloc->addend;
+	}
+
 	return 0;
 }
 



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 11/13] objtool: Rewrite jump_label instructions
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (9 preceding siblings ...)
  2021-05-06 19:34 ` [PATCH 10/13] objtool: Decode jump_entry::key addend Peter Zijlstra
@ 2021-05-06 19:34 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 12/13] objtool: Provide stats for jump_labels Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 13/13] jump_label,x86: Allow short NOPs Peter Zijlstra
  12 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:34 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

When a jump_entry::key has bit1 set, rewrite the instruction to be a
NOP. This allows the compiler/assembler to emit JMP (and thus decide
on which encoding to use).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1223,6 +1223,20 @@ static int handle_jump_alt(struct objtoo
 		return -1;
 	}
 
+	if (special_alt->key_addend & 2) {
+		struct reloc *reloc = insn_reloc(file, orig_insn);
+
+		if (reloc) {
+			reloc->type = R_NONE;
+			elf_write_reloc(file->elf, reloc);
+		}
+		elf_write_insn(file->elf, orig_insn->sec,
+			       orig_insn->offset, orig_insn->len,
+			       arch_nop_insn(orig_insn->len));
+		orig_insn->type = INSN_NOP;
+		return 0;
+	}
+
 	*new_insn = list_next_entry(orig_insn, list);
 	return 0;
 }



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 12/13] objtool: Provide stats for jump_labels
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (10 preceding siblings ...)
  2021-05-06 19:34 ` [PATCH 11/13] objtool: Rewrite jump_label instructions Peter Zijlstra
@ 2021-05-06 19:34 ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-05-13 14:15   ` [PATCH 12.5/13] objtool: Reflow handle_jump_alt() Peter Zijlstra
  2021-05-06 19:34 ` [PATCH 13/13] jump_label,x86: Allow short NOPs Peter Zijlstra
  12 siblings, 2 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:34 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Add objtool --stats to count the jump_label sites it encounters.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                   |   22 ++++++++++++++++++++--
 tools/objtool/include/objtool/objtool.h |    3 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,8 +1225,15 @@ static int handle_jump_alt(struct objtoo
 			   struct instruction *orig_insn,
 			   struct instruction **new_insn)
 {
-	if (orig_insn->type == INSN_NOP)
+	if (orig_insn->type == INSN_NOP) {
+do_nop:
+		if (orig_insn->len == 2)
+			file->jl_nop_short++;
+		else
+			file->jl_nop_long++;
+
 		return 0;
+	}
 
 	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
 		WARN_FUNC("unsupported instruction at jump label",
@@ -1245,9 +1252,14 @@ static int handle_jump_alt(struct objtoo
 			       orig_insn->offset, orig_insn->len,
 			       arch_nop_insn(orig_insn->len));
 		orig_insn->type = INSN_NOP;
-		return 0;
+		goto do_nop;
 	}
 
+	if (orig_insn->len == 2)
+		file->jl_short++;
+	else
+		file->jl_long++;
+
 	*new_insn = list_next_entry(orig_insn, list);
 	return 0;
 }
@@ -1328,6 +1340,12 @@ static int add_special_section_alts(stru
 		free(special_alt);
 	}
 
+	if (stats) {
+		printf("jl\\\tNOP\tJMP\n");
+		printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
+		printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
+	}
+
 out:
 	return ret;
 }
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -22,6 +22,9 @@ struct objtool_file {
 	struct list_head static_call_list;
 	struct list_head mcount_loc_list;
 	bool ignore_unreachables, c_file, hints, rodata;
+
+	unsigned long jl_short, jl_long;
+	unsigned long jl_nop_short, jl_nop_long;
 };
 
 struct objtool_file *objtool_open_read(const char *_objname);



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 13/13] jump_label,x86: Allow short NOPs
  2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
                   ` (11 preceding siblings ...)
  2021-05-06 19:34 ` [PATCH 12/13] objtool: Provide stats for jump_labels Peter Zijlstra
@ 2021-05-06 19:34 ` Peter Zijlstra
  2021-05-06 19:49   ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] jump_label, x86: " tip-bot2 for Peter Zijlstra
  12 siblings, 2 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:34 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel, peterz

Now that objtool is able to rewrite jump_label instructions, have the
compiler emit a JMP, such that it can decide on the optimal encoding,
and set jump_entry::key bit1 to indicate that objtool should rewrite
the instruction to a matching NOP.

For x86_64-allyesconfig this gives:

jl\     NOP     JMP
short:  22997   124
long:   30874   90

IOW, we save (22997+124) * 3 bytes of kernel text in hotpaths.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/jump_label.h |   18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -28,6 +28,22 @@
 	_ASM_PTR "%c0 + %c1 - .\n\t"			\
 	".popsection \n\t"
 
+#ifdef CONFIG_STACK_VALIDATION
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:"
+		"jmp %l[l_yes] # objtool NOPs this \n\t"
+		JUMP_TABLE_ENTRY
+		: :  "i" (key), "i" (2 | branch) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+#else
+
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
@@ -40,6 +56,8 @@ static __always_inline bool arch_static_
 	return true;
 }
 
+#endif /* STACK_VALIDATION */
+
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 13/13] jump_label,x86: Allow short NOPs
  2021-05-06 19:34 ` [PATCH 13/13] jump_label,x86: Allow short NOPs Peter Zijlstra
@ 2021-05-06 19:49   ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] jump_label, x86: " tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-06 19:49 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel

On Thu, May 06, 2021 at 09:34:05PM +0200, Peter Zijlstra wrote:
> +#ifdef CONFIG_STACK_VALIDATION

Do we want something like this?

---
--- a/Documentation/x86/orc-unwinder.rst
+++ b/Documentation/x86/orc-unwinder.rst
@@ -15,7 +15,7 @@ the ORC unwinder to be much simpler and
 The ORC data consists of unwind tables which are generated by objtool.
 They contain out-of-band data which is used by the in-kernel ORC
 unwinder.  Objtool generates the ORC data by first doing compile-time
-stack metadata validation (CONFIG_STACK_VALIDATION).  After analyzing
+stack metadata validation (CONFIG_OBJTOOL).  After analyzing
 all the code paths of a .o file, it determines information about the
 stack state at each instruction address in the file and outputs that
 information to the .orc_unwind and .orc_unwind_ip sections.
--- a/Makefile
+++ b/Makefile
@@ -1091,12 +1091,12 @@ HOST_LIBELF_LIBS = $(shell pkg-config li
 has_libelf = $(call try-run,\
                echo "int main() {}" | $(HOSTCC) $(KBUILD_HOSTLDFLAGS) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0)
 
-ifdef CONFIG_STACK_VALIDATION
+ifdef CONFIG_OBJTOOL
   ifeq ($(has_libelf),1)
     objtool_target := tools/objtool FORCE
   else
-    SKIP_STACK_VALIDATION := 1
-    export SKIP_STACK_VALIDATION
+    SKIP_OBJTOOL := 1
+    export SKIP_OBJTOOL
   endif
 endif
 
@@ -1247,7 +1247,7 @@ asm-generic: uapi-asm-generic
 
 PHONY += prepare-objtool prepare-resolve_btfids
 prepare-objtool: $(objtool_target)
-ifeq ($(SKIP_STACK_VALIDATION),1)
+ifeq ($(SKIP_OBJTOOL),1)
 ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
 	@echo "error: Cannot generate __mcount_loc for CONFIG_DYNAMIC_FTRACE=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
 	@false
@@ -1256,7 +1256,7 @@ ifdef CONFIG_UNWINDER_ORC
 	@echo "error: Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
 	@false
 else
-	@echo "warning: Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
+	@echo "warning: Cannot use CONFIG_OBJTOOL=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
 endif
 endif
 
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -984,7 +984,7 @@ config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LA
 	depends on MMU
 	select ARCH_HAS_ELF_RANDOMIZE
 
-config HAVE_STACK_VALIDATION
+config HAVE_OBJTOOL
 	bool
 	help
 	  Architecture supports the 'objtool check' host tool command, which
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -76,7 +76,7 @@ config X86
 	select ARCH_HAS_FILTER_PGPROT
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
-	select ARCH_HAS_KCOV			if X86_64 && STACK_VALIDATION
+	select ARCH_HAS_KCOV			if X86_64 && OBJTOOL
 	select ARCH_HAS_MEM_ENCRYPT
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
@@ -181,7 +181,7 @@ config X86
 	select HAVE_CONTEXT_TRACKING		if X86_64
 	select HAVE_CONTEXT_TRACKING_OFFSTACK	if HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
-	select HAVE_OBJTOOL_MCOUNT		if STACK_VALIDATION
+	select HAVE_OBJTOOL_MCOUNT		if OBJTOOL
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
@@ -232,13 +232,13 @@ config X86
 	select MMU_GATHER_RCU_TABLE_FREE		if PARAVIRT
 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
+	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && OBJTOOL
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
-	select HAVE_STACK_VALIDATION		if X86_64
+	select HAVE_OBJTOOL		if X86_64
 	select HAVE_STATIC_CALL
-	select HAVE_STATIC_CALL_INLINE		if HAVE_STACK_VALIDATION
+	select HAVE_STATIC_CALL_INLINE		if HAVE_OBJTOOL
 	select HAVE_PREEMPT_DYNAMIC
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
@@ -255,7 +255,7 @@ config X86
 	select RTC_MC146818_LIB
 	select SPARSE_IRQ
 	select SRCU
-	select STACK_VALIDATION			if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
+	select OBJTOOL			if HAVE_OBJTOOL && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select USER_STACKTRACE_SUPPORT
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -240,7 +240,7 @@ choice
 config UNWINDER_ORC
 	bool "ORC unwinder"
 	depends on X86_64
-	select STACK_VALIDATION
+	select OBJTOOL
 	help
 	  This option enables the ORC (Oops Rewind Capability) unwinder for
 	  unwinding kernel stack traces.  It uses a custom data format which is
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -20,7 +20,7 @@
 	_ASM_PTR "%c0 + %c1 - .\n\t"			\
 	".popsection \n\t"
 
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
 
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
@@ -48,7 +48,7 @@ static __always_inline bool arch_static_
 	return true;
 }
 
-#endif /* STACK_VALIDATION */
+#endif /* OBJTOOL */
 
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -109,7 +109,7 @@ void ftrace_likely_update(struct ftrace_
 #endif
 
 /* Unreachable code */
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
 /*
  * These macros help objtool understand GCC code flow for unreachable code.
  * The __COUNTER__ based labels are a hack to make each instance of the macros
--- a/include/linux/instrumentation.h
+++ b/include/linux/instrumentation.h
@@ -2,7 +2,7 @@
 #ifndef __LINUX_INSTRUMENTATION_H
 #define __LINUX_INSTRUMENTATION_H
 
-#if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_STACK_VALIDATION)
+#if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_OBJTOOL)
 
 /* Begin/end of an instrumentation safe region */
 #define instrumentation_begin() ({					\
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -38,7 +38,7 @@ struct unwind_hint {
 #define UNWIND_HINT_TYPE_REGS_PARTIAL	2
 #define UNWIND_HINT_TYPE_FUNC		3
 
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
 
 #ifndef __ASSEMBLY__
 
@@ -120,7 +120,7 @@ struct unwind_hint {
 
 #endif /* __ASSEMBLY__ */
 
-#else /* !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_OBJTOOL */
 
 #ifndef __ASSEMBLY__
 
@@ -135,6 +135,6 @@ struct unwind_hint {
 .endm
 #endif
 
-#endif /* CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_OBJTOOL */
 
 #endif /* _LINUX_OBJTOOL_H */
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -428,9 +428,9 @@ config FRAME_POINTER
 	  larger and slower, but it gives very useful debugging information
 	  in case of kernel bugs. (precise oopses/stacktraces/warnings)
 
-config STACK_VALIDATION
+config OBJTOOL
 	bool "Compile-time stack metadata validation"
-	depends on HAVE_STACK_VALIDATION
+	depends on HAVE_OBJTOOL
 	default n
 	help
 	  Add compile-time checks to validate stack metadata, including frame
@@ -445,7 +445,7 @@ config STACK_VALIDATION
 
 config VMLINUX_VALIDATION
 	bool
-	depends on STACK_VALIDATION && DEBUG_ENTRY && !PARAVIRT
+	depends on OBJTOOL && DEBUG_ENTRY && !PARAVIRT
 	default y
 
 config VMLINUX_MAP
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -106,7 +106,7 @@ config UBSAN_UNREACHABLE
 	bool "Perform checking for unreachable code"
 	# objtool already handles unreachable checking and gets angry about
 	# seeing UBSan instrumentation located in unreachable places.
-	depends on !STACK_VALIDATION
+	depends on !OBJTOOL
 	depends on $(cc-option,-fsanitize=unreachable)
 	help
 	  This option enables -fsanitize=unreachable which checks for control
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -217,9 +217,9 @@ cmd_record_mcount = $(if $(findstring $(
 	$(sub_cmd_record_mcount))
 endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
 
-ifdef CONFIG_STACK_VALIDATION
+ifdef CONFIG_OBJTOOL
 ifndef CONFIG_LTO_CLANG
-ifneq ($(SKIP_STACK_VALIDATION),1)
+ifneq ($(SKIP_OBJTOOL),1)
 
 __objtool_obj := $(objtree)/tools/objtool/objtool
 
@@ -233,14 +233,14 @@ objtool_obj = $(if $(patsubst y%,, \
 	$(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \
 	$(__objtool_obj))
 
-endif # SKIP_STACK_VALIDATION
+endif # SKIP_OBJTOOL
 endif # CONFIG_LTO_CLANG
-endif # CONFIG_STACK_VALIDATION
+endif # CONFIG_OBJTOOL
 
 # Rebuild all objects when objtool changes, or is enabled/disabled.
 objtool_dep = $(objtool_obj)					\
 	      $(wildcard include/config/ORC_UNWINDER		\
-			 include/config/STACK_VALIDATION)
+			 include/config/OBJTOOL)
 
 ifdef CONFIG_TRIM_UNUSED_KSYMS
 cmd_gen_ksymdeps = \
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -38,14 +38,14 @@ prelink-ext := .lto
 # ELF processing was skipped earlier because we didn't have native code,
 # so let's now process the prelinked binary before we link the module.
 
-ifdef CONFIG_STACK_VALIDATION
-ifneq ($(SKIP_STACK_VALIDATION),1)
+ifdef CONFIG_OBJTOOL
+ifneq ($(SKIP_OBJTOOL),1)
 cmd_ld_ko_o +=								\
 	$(objtree)/tools/objtool/objtool $(objtool_args)		\
 		$(@:.ko=$(prelink-ext).o);
 
-endif # SKIP_STACK_VALIDATION
-endif # CONFIG_STACK_VALIDATION
+endif # SKIP_OBJTOOL
+endif # CONFIG_OBJTOOL
 
 endif # CONFIG_LTO_CLANG
 
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -106,7 +106,7 @@ objtool_link()
 	local objtoolcmd;
 	local objtoolopt;
 
-	if [ "${CONFIG_LTO_CLANG} ${CONFIG_STACK_VALIDATION}" = "y y" ]; then
+	if [ "${CONFIG_LTO_CLANG} ${CONFIG_OBJTOOL}" = "y y" ]; then
 		# Don't perform vmlinux validation unless explicitly requested,
 		# but run objtool on vmlinux.o now that we have an object file.
 		if [ -n "${CONFIG_UNWINDER_ORC}" ]; then
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -67,7 +67,7 @@ deploy_kernel_headers () {
 	) > debian/hdrsrcfiles
 
 	{
-		if is_enabled CONFIG_STACK_VALIDATION; then
+		if is_enabled CONFIG_OBJTOOL; then
 			echo tools/objtool/objtool
 		fi
 
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -38,7 +38,7 @@ struct unwind_hint {
 #define UNWIND_HINT_TYPE_REGS_PARTIAL	2
 #define UNWIND_HINT_TYPE_FUNC		3
 
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
 
 #ifndef __ASSEMBLY__
 
@@ -120,7 +120,7 @@ struct unwind_hint {
 
 #endif /* __ASSEMBLY__ */
 
-#else /* !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_OBJTOOL */
 
 #ifndef __ASSEMBLY__
 
@@ -135,6 +135,6 @@ struct unwind_hint {
 .endm
 #endif
 
-#endif /* CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_OBJTOOL */
 
 #endif /* _LINUX_OBJTOOL_H */
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -5,7 +5,7 @@ Compile-time stack metadata validation
 Overview
 --------
 
-The kernel CONFIG_STACK_VALIDATION option enables a host tool named
+The kernel CONFIG_OBJTOOL option enables a host tool named
 objtool which runs at compile time.  It has a "check" subcommand which
 analyzes every .o file and ensures the validity of its stack metadata.
 It enforces a set of rules on asm code and C inline assembly code so
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -1,6 +1,6 @@
 CONFIG_LOCALVERSION="-debug"
 CONFIG_FRAME_POINTER=y
-CONFIG_STACK_VALIDATION=y
+CONFIG_OBJTOOL=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
@ 2021-05-12 10:41   ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2021-06-10 18:14   ` [PATCH 01/13] " Nathan Chancellor
  2 siblings, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-12 10:41 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel

On Thu, May 06, 2021 at 09:33:53PM +0200, Peter Zijlstra wrote:
> @@ -343,6 +360,10 @@ static int read_symbols(struct elf *elf)
>  
>  	symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
>  
> +	if (!elf_alloc_hash(symbol, symbols_nr) ||
> +	    !elf_alloc_hash(symbol_name, symbols_nr))
> +		return -1;
> +
>  	for (i = 0; i < symbols_nr; i++) {
>  		sym = malloc(sizeof(*sym));
>  		if (!sym) {

Ingo ran into the empty file without .symtab case with as-2.36.1, which
then means we don't even allocate the symbol hashes which then explodes
later.

The below seems to fix things.

---
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6942357cd4a2..60bef847ee85 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -340,25 +340,19 @@ static int read_symbols(struct elf *elf)
 {
 	struct section *symtab, *symtab_shndx, *sec;
 	struct symbol *sym, *pfunc;
-	int symbols_nr, i;
+	int i, symbols_nr = 0;
 	char *coldstr;
 	Elf_Data *shndx_data = NULL;
 	Elf32_Word shndx;
 
 	symtab = find_section_by_name(elf, ".symtab");
-	if (!symtab) {
-		/*
-		 * A missing symbol table is actually possible if it's an empty
-		 * .o file.  This can happen for thunk_64.o.
-		 */
-		return 0;
-	}
-
-	symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
-	if (symtab_shndx)
-		shndx_data = symtab_shndx->data;
+	if (symtab) {
+		symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+		if (symtab_shndx)
+			shndx_data = symtab_shndx->data;
 
-	symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+		symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+	}
 
 	if (!elf_alloc_hash(symbol, symbols_nr) ||
 	    !elf_alloc_hash(symbol_name, symbols_nr))

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] objtool: Provide stats for jump_labels
  2021-05-06 19:34 ` [PATCH 12/13] objtool: Provide stats for jump_labels Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  2021-05-13 14:15   ` [PATCH 12.5/13] objtool: Reflow handle_jump_alt() Peter Zijlstra
  1 sibling, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     e2d9494beff21a26438eb611c260b8a6c2dc4dbf
Gitweb:        https://git.kernel.org/tip/e2d9494beff21a26438eb611c260b8a6c2dc4dbf
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:34:04 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:56 +02:00

objtool: Provide stats for jump_labels

Add objtool --stats to count the jump_label sites it encounters.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.153101906@infradead.org
---
 tools/objtool/check.c                   | 22 ++++++++++++++++++++--
 tools/objtool/include/objtool/objtool.h |  3 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 98cf87f..2c6a93e 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,8 +1225,15 @@ static int handle_jump_alt(struct objtool_file *file,
 			   struct instruction *orig_insn,
 			   struct instruction **new_insn)
 {
-	if (orig_insn->type == INSN_NOP)
+	if (orig_insn->type == INSN_NOP) {
+do_nop:
+		if (orig_insn->len == 2)
+			file->jl_nop_short++;
+		else
+			file->jl_nop_long++;
+
 		return 0;
+	}
 
 	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
 		WARN_FUNC("unsupported instruction at jump label",
@@ -1245,9 +1252,14 @@ static int handle_jump_alt(struct objtool_file *file,
 			       orig_insn->offset, orig_insn->len,
 			       arch_nop_insn(orig_insn->len));
 		orig_insn->type = INSN_NOP;
-		return 0;
+		goto do_nop;
 	}
 
+	if (orig_insn->len == 2)
+		file->jl_short++;
+	else
+		file->jl_long++;
+
 	*new_insn = list_next_entry(orig_insn, list);
 	return 0;
 }
@@ -1328,6 +1340,12 @@ static int add_special_section_alts(struct objtool_file *file)
 		free(special_alt);
 	}
 
+	if (stats) {
+		printf("jl\\\tNOP\tJMP\n");
+		printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
+		printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
+	}
+
 out:
 	return ret;
 }
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index e4084af..24fa836 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -22,6 +22,9 @@ struct objtool_file {
 	struct list_head static_call_list;
 	struct list_head mcount_loc_list;
 	bool ignore_unreachables, c_file, hints, rodata;
+
+	unsigned long jl_short, jl_long;
+	unsigned long jl_nop_short, jl_nop_long;
 };
 
 struct objtool_file *objtool_open_read(const char *_objname);

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-05-06 19:34 ` [PATCH 13/13] jump_label,x86: Allow short NOPs Peter Zijlstra
  2021-05-06 19:49   ` Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  2021-05-18 19:50     ` Peter Zijlstra
  1 sibling, 1 reply; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     ab3257042c26d0cd44793c741e2f89bf38b21fe8
Gitweb:        https://git.kernel.org/tip/ab3257042c26d0cd44793c741e2f89bf38b21fe8
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:34:05 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:56 +02:00

jump_label, x86: Allow short NOPs

Now that objtool is able to rewrite jump_label instructions, have the
compiler emit a JMP, such that it can decide on the optimal encoding,
and set jump_entry::key bit1 to indicate that objtool should rewrite
the instruction to a matching NOP.

For x86_64-allyesconfig this gives:

  jl\     NOP     JMP
  short:  22997   124
  long:   30874   90

IOW, we save (22997+124) * 3 bytes of kernel text in hotpaths.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.216763632@infradead.org
---
 arch/x86/include/asm/jump_label.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index ef819e3..0449b12 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -20,6 +20,22 @@
 	_ASM_PTR "%c0 + %c1 - .\n\t"			\
 	".popsection \n\t"
 
+#ifdef CONFIG_STACK_VALIDATION
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:"
+		"jmp %l[l_yes] # objtool NOPs this \n\t"
+		JUMP_TABLE_ENTRY
+		: :  "i" (key), "i" (2 | branch) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+#else
+
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
@@ -32,6 +48,8 @@ l_yes:
 	return true;
 }
 
+#endif /* STACK_VALIDATION */
+
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] objtool: Rewrite jump_label instructions
  2021-05-06 19:34 ` [PATCH 11/13] objtool: Rewrite jump_label instructions Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     6d37b83c5d79ef5996cc49c3e3ac3d8ecd8c7050
Gitweb:        https://git.kernel.org/tip/6d37b83c5d79ef5996cc49c3e3ac3d8ecd8c7050
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:34:03 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:56 +02:00

objtool: Rewrite jump_label instructions

When a jump_entry::key has bit1 set, rewrite the instruction to be a
NOP. This allows the compiler/assembler to emit JMP (and thus decide
on which encoding to use).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.091028792@infradead.org
---
 tools/objtool/check.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9ed1a4c..98cf87f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1234,6 +1234,20 @@ static int handle_jump_alt(struct objtool_file *file,
 		return -1;
 	}
 
+	if (special_alt->key_addend & 2) {
+		struct reloc *reloc = insn_reloc(file, orig_insn);
+
+		if (reloc) {
+			reloc->type = R_NONE;
+			elf_write_reloc(file->elf, reloc);
+		}
+		elf_write_insn(file->elf, orig_insn->sec,
+			       orig_insn->offset, orig_insn->len,
+			       arch_nop_insn(orig_insn->len));
+		orig_insn->type = INSN_NOP;
+		return 0;
+	}
+
 	*new_insn = list_next_entry(orig_insn, list);
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] objtool: Decode jump_entry::key addend
  2021-05-06 19:34 ` [PATCH 10/13] objtool: Decode jump_entry::key addend Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     cbf82a3dc241aea82b941a872ed5c52f6af527ea
Gitweb:        https://git.kernel.org/tip/cbf82a3dc241aea82b941a872ed5c52f6af527ea
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:34:02 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

objtool: Decode jump_entry::key addend

Teach objtool about the the low bits in the struct static_key pointer.

That is, the low two bits of @key in:

  struct jump_entry {
	s32 code;
	s32 target;
	long key;
  }

as found in the __jump_table section. Since @key has a relocation to
the variable (to be resolved by the linker), the low two bits will be
reflected in the relocation's addend.

As such, find the reloc and store the addend, such that we can access
these bits.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.028024143@infradead.org
---
 tools/objtool/arch/x86/include/arch/special.h |  1 +
 tools/objtool/include/objtool/special.h       |  1 +
 tools/objtool/special.c                       | 14 ++++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index 14271cc..f2918f7 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -9,6 +9,7 @@
 #define JUMP_ENTRY_SIZE		16
 #define JUMP_ORIG_OFFSET	0
 #define JUMP_NEW_OFFSET		4
+#define JUMP_KEY_OFFSET		8
 
 #define ALT_ENTRY_SIZE		12
 #define ALT_ORIG_OFFSET		0
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 8a09f4e..dc4721e 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -27,6 +27,7 @@ struct special_alt {
 	unsigned long new_off;
 
 	unsigned int orig_len, new_len; /* group only */
+	u8 key_addend;
 };
 
 int special_get_alts(struct elf *elf, struct list_head *alts);
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 07b21cf..bc925cf 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@ struct special_entry {
 	unsigned char size, orig, new;
 	unsigned char orig_len, new_len; /* group only */
 	unsigned char feature; /* ALTERNATIVE macro CPU feature */
+	unsigned char key; /* jump_label key */
 };
 
 struct special_entry entries[] = {
@@ -42,6 +43,7 @@ struct special_entry entries[] = {
 		.size = JUMP_ENTRY_SIZE,
 		.orig = JUMP_ORIG_OFFSET,
 		.new = JUMP_NEW_OFFSET,
+		.key = JUMP_KEY_OFFSET,
 	},
 	{
 		.sec = "__ex_table",
@@ -122,6 +124,18 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 			alt->new_off -= 0x7ffffff0;
 	}
 
+	if (entry->key) {
+		struct reloc *key_reloc;
+
+		key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
+		if (!key_reloc) {
+			WARN_FUNC("can't find key reloc",
+				  sec, offset + entry->key);
+			return -1;
+		}
+		alt->key_addend = key_reloc->addend;
+	}
+
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Emit short JMP
  2021-05-06 19:34 ` [PATCH 09/13] jump_label,x86: Emit short JMP Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     e7bf1ba97afdde75b0ef43e4bdb718bf843613f1
Gitweb:        https://git.kernel.org/tip/e7bf1ba97afdde75b0ef43e4bdb718bf843613f1
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:34:01 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label, x86: Emit short JMP

Now that we can patch short JMP/NOP, allow the compiler/assembler to
emit short JMP instructions.

There is no way to have the assembler emit short NOPs based on the
potential displacement, so leave those long for now.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.967034497@infradead.org
---
 arch/x86/include/asm/jump_label.h | 3 +--
 arch/x86/kernel/jump_label.c      | 8 +++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index d85802a..ef819e3 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -35,8 +35,7 @@ l_yes:
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-		".byte 0xe9 \n\t"
-		".long %l[l_yes] - (. + 4) \n\t"
+		"jmp %l[l_yes]\n\t"
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 190d810..a762dc1 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -15,12 +15,18 @@
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
+#include <asm/insn.h>
 
 #define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
 
 int arch_jump_entry_size(struct jump_entry *entry)
 {
-	return JMP32_INSN_SIZE;
+	struct insn insn = {};
+
+	insn_decode_kernel(&insn, (void *)jump_entry_code(entry));
+	BUG_ON(insn.length != 2 && insn.length != 5);
+
+	return insn.length;
 }
 
 struct jump_label_patch {

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label: Free jump_entry::key bit1 for build use
  2021-05-06 19:34 ` [PATCH 08/13] jump_label: Free jump_entry::key bit1 for build use Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     5af0ea293d78c8b8f0b87ae2b13f7ac584057bc3
Gitweb:        https://git.kernel.org/tip/5af0ea293d78c8b8f0b87ae2b13f7ac584057bc3
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:34:00 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label: Free jump_entry::key bit1 for build use

Have jump_label_init() set jump_entry::key bit1 to either 0 ot 1
unconditionally. This makes it available for build-time games.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.906893264@infradead.org
---
 include/linux/jump_label.h |  7 +++++--
 kernel/jump_label.c        | 10 ++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8c45f58..48b9b2a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -171,9 +171,12 @@ static inline bool jump_entry_is_init(const struct jump_entry *entry)
 	return (unsigned long)entry->key & 2UL;
 }
 
-static inline void jump_entry_set_init(struct jump_entry *entry)
+static inline void jump_entry_set_init(struct jump_entry *entry, bool set)
 {
-	entry->key |= 2;
+	if (set)
+		entry->key |= 2;
+	else
+		entry->key &= ~2;
 }
 
 static inline int jump_entry_size(struct jump_entry *entry)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 521cafc..bdb0681 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -483,13 +483,14 @@ void __init jump_label_init(void)
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
+		bool in_init;
 
 		/* rewrite NOPs */
 		if (jump_label_type(iter) == JUMP_LABEL_NOP)
 			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 
-		if (init_section_contains((void *)jump_entry_code(iter), 1))
-			jump_entry_set_init(iter);
+		in_init = init_section_contains((void *)jump_entry_code(iter), 1);
+		jump_entry_set_init(iter, in_init);
 
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
@@ -634,9 +635,10 @@ static int jump_label_add_module(struct module *mod)
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
+		bool in_init;
 
-		if (within_module_init(jump_entry_code(iter), mod))
-			jump_entry_set_init(iter);
+		in_init = within_module_init(jump_entry_code(iter), mod);
+		jump_entry_set_init(iter, in_init);
 
 		iterk = jump_entry_key(iter);
 		if (iterk == key)

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Add variable length patching support
  2021-05-06 19:33 ` [PATCH 07/13] jump_label, x86: Add variable length patching support Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  2021-05-13 14:16   ` [PATCH 07.5/13] jump_label,x86: Remove unused JUMP_LABEL_NOP_SIZE Peter Zijlstra
  1 sibling, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     001951bea748d3f675e1778f42b17290a8c551bf
Gitweb:        https://git.kernel.org/tip/001951bea748d3f675e1778f42b17290a8c551bf
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:59 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label, x86: Add variable length patching support

This allows the patching to to emit 2 byte JMP/NOP instruction in
addition to the 5 byte JMP/NOP we already did. This allows for more
compact code.

This code is not yet used, as we don't emit shorter code at compile
time yet.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.846870383@infradead.org
---
 arch/x86/kernel/jump_label.c | 53 +++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index a29eecc..190d810 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -23,44 +23,63 @@ int arch_jump_entry_size(struct jump_entry *entry)
 	return JMP32_INSN_SIZE;
 }
 
-static const void *
-__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
+struct jump_label_patch {
+	const void *code;
+	int size;
+};
+
+static struct jump_label_patch
+__jump_label_patch(struct jump_entry *entry, enum jump_label_type type)
 {
-	const void *expect, *code;
+	const void *expect, *code, *nop;
 	const void *addr, *dest;
+	int size;
 
 	addr = (void *)jump_entry_code(entry);
 	dest = (void *)jump_entry_target(entry);
 
-	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+	size = arch_jump_entry_size(entry);
+	switch (size) {
+	case JMP8_INSN_SIZE:
+		code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest);
+		nop = x86_nops[size];
+		break;
+
+	case JMP32_INSN_SIZE:
+		code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+		nop = x86_nops[size];
+		break;
+
+	default: BUG();
+	}
 
 	if (type == JUMP_LABEL_JMP)
-		expect = x86_nops[5];
+		expect = nop;
 	else
 		expect = code;
 
-	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) {
+	if (memcmp(addr, expect, size)) {
 		/*
 		 * The location is not an op that we were expecting.
 		 * Something went wrong. Crash the box, as something could be
 		 * corrupting the kernel.
 		 */
-		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n",
-				addr, addr, addr, expect, type);
+		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n",
+				addr, addr, addr, expect, size, type);
 		BUG();
 	}
 
 	if (type == JUMP_LABEL_NOP)
-		code = x86_nops[5];
+		code = nop;
 
-	return code;
+	return (struct jump_label_patch){.code = code, .size = size};
 }
 
 static inline void __jump_label_transform(struct jump_entry *entry,
 					  enum jump_label_type type,
 					  int init)
 {
-	const void *opcode = __jump_label_set_jump_code(entry, type);
+	const struct jump_label_patch jlp = __jump_label_patch(entry, type);
 
 	/*
 	 * As long as only a single processor is running and the code is still
@@ -74,12 +93,11 @@ static inline void __jump_label_transform(struct jump_entry *entry,
 	 * always nop being the 'currently valid' instruction
 	 */
 	if (init || system_state == SYSTEM_BOOTING) {
-		text_poke_early((void *)jump_entry_code(entry), opcode,
-				JUMP_LABEL_NOP_SIZE);
+		text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size);
 		return;
 	}
 
-	text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
 }
 
 static void __ref jump_label_transform(struct jump_entry *entry,
@@ -100,7 +118,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 bool arch_jump_label_transform_queue(struct jump_entry *entry,
 				     enum jump_label_type type)
 {
-	const void *opcode;
+	struct jump_label_patch jlp;
 
 	if (system_state == SYSTEM_BOOTING) {
 		/*
@@ -111,9 +129,8 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
 	}
 
 	mutex_lock(&text_mutex);
-	opcode = __jump_label_set_jump_code(entry, type);
-	text_poke_queue((void *)jump_entry_code(entry),
-			opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	jlp = __jump_label_patch(entry, type);
+	text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
 	mutex_unlock(&text_mutex);
 	return true;
 }

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Introduce jump_entry_size()
  2021-05-06 19:33 ` [PATCH 06/13] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     fa5e5dc39669b4427830c546ede8709323b8276c
Gitweb:        https://git.kernel.org/tip/fa5e5dc39669b4427830c546ede8709323b8276c
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:58 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label, x86: Introduce jump_entry_size()

This allows architectures to have variable sized jumps.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.786777050@infradead.org
---
 arch/x86/include/asm/jump_label.h |  4 ++--
 arch/x86/kernel/jump_label.c      |  7 +++++++
 include/linux/jump_label.h        |  9 +++++++++
 kernel/jump_label.c               |  2 +-
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index dfdc2b1..d85802a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,8 +4,6 @@
 
 #define HAVE_JUMP_LABEL_BATCH
 
-#define JUMP_LABEL_NOP_SIZE 5
-
 #include <asm/asm.h>
 #include <asm/nops.h>
 
@@ -47,6 +45,8 @@ l_yes:
 	return true;
 }
 
+extern int arch_jump_entry_size(struct jump_entry *entry);
+
 #endif	/* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 638d3b9..a29eecc 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,6 +16,13 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
+#define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
+
+int arch_jump_entry_size(struct jump_entry *entry)
+{
+	return JMP32_INSN_SIZE;
+}
+
 static const void *
 __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
 {
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 05f5554..8c45f58 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -176,6 +176,15 @@ static inline void jump_entry_set_init(struct jump_entry *entry)
 	entry->key |= 2;
 }
 
+static inline int jump_entry_size(struct jump_entry *entry)
+{
+#ifdef JUMP_LABEL_NOP_SIZE
+	return JUMP_LABEL_NOP_SIZE;
+#else
+	return arch_jump_entry_size(entry);
+#endif
+}
+
 #endif
 #endif
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index ba39fbb..521cafc 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -309,7 +309,7 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit);
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 {
 	if (jump_entry_code(entry) <= (unsigned long)end &&
-	    jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+	    jump_entry_code(entry) + jump_entry_size(entry) > (unsigned long)start)
 		return 1;
 
 	return 0;

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Improve error when we fail expected text
  2021-05-06 19:33 ` [PATCH 05/13] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     f9510fa9caaf8229381d5f86ba0774bf1a6ca39b
Gitweb:        https://git.kernel.org/tip/f9510fa9caaf8229381d5f86ba0774bf1a6ca39b
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:57 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label, x86: Improve error when we fail expected text

There is only a single usage site left, remove the function and extend
the print to include more information, like the expected text and the
patch type.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.726939027@infradead.org
---
 arch/x86/kernel/jump_label.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 6a2eb62..638d3b9 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,37 +16,32 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
-static void bug_at(const void *ip, int line)
-{
-	/*
-	 * The location is not an op that we were expecting.
-	 * Something went wrong. Crash the box, as something could be
-	 * corrupting the kernel.
-	 */
-	pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line);
-	BUG();
-}
-
 static const void *
 __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
 {
 	const void *expect, *code;
 	const void *addr, *dest;
-	int line;
 
 	addr = (void *)jump_entry_code(entry);
 	dest = (void *)jump_entry_target(entry);
 
 	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
 
-	if (type == JUMP_LABEL_JMP) {
-		expect = x86_nops[5]; line = __LINE__;
-	} else {
-		expect = code; line = __LINE__;
-	}
+	if (type == JUMP_LABEL_JMP)
+		expect = x86_nops[5];
+	else
+		expect = code;
 
-	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))
-		bug_at(addr, line);
+	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) {
+		/*
+		 * The location is not an op that we were expecting.
+		 * Something went wrong. Crash the box, as something could be
+		 * corrupting the kernel.
+		 */
+		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n",
+				addr, addr, addr, expect, type);
+		BUG();
+	}
 
 	if (type == JUMP_LABEL_NOP)
 		code = x86_nops[5];

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Factor out the __jump_table generation
  2021-05-06 19:33 ` [PATCH 04/13] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     e1aa35c4c4bc71e44dabc9d7d167b807edd7b439
Gitweb:        https://git.kernel.org/tip/e1aa35c4c4bc71e44dabc9d7d167b807edd7b439
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:56 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label, x86: Factor out the __jump_table generation

Both arch_static_branch() and arch_static_branch_jump() have the same
blurb to generate the __jump_table entry, share it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.663132781@infradead.org
---
 arch/x86/include/asm/jump_label.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 01de21e..dfdc2b1 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -14,15 +14,19 @@
 #include <linux/stringify.h>
 #include <linux/types.h>
 
+#define JUMP_TABLE_ENTRY				\
+	".pushsection __jump_table,  \"aw\" \n\t"	\
+	_ASM_ALIGN "\n\t"				\
+	".long 1b - . \n\t"				\
+	".long %l[l_yes] - . \n\t"			\
+	_ASM_PTR "%c0 + %c1 - .\n\t"			\
+	".popsection \n\t"
+
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
 		".byte " __stringify(BYTES_NOP5) "\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -33,13 +37,9 @@ l_yes:
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
-		"2:\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+		".byte 0xe9 \n\t"
+		".long %l[l_yes] - (. + 4) \n\t"
+		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label, x86: Strip ASM jump_label support
  2021-05-06 19:33 ` [PATCH 03/13] jump_label, x86: Strip ASM jump_label support Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     8bfafcdccb52e770695b12530b1f800fe98b16b1
Gitweb:        https://git.kernel.org/tip/8bfafcdccb52e770695b12530b1f800fe98b16b1
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:55 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:55 +02:00

jump_label, x86: Strip ASM jump_label support

In prepration for variable size jump_label support; remove all ASM
bits, which are currently unused.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.599716762@infradead.org
---
 arch/x86/include/asm/jump_label.h | 36 +------------------------------
 1 file changed, 36 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 610a053..01de21e 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -47,42 +47,6 @@ l_yes:
 	return true;
 }
 
-#else	/* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.else
-	.byte		BYTES_NOP5
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key - .
-	.popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	.byte		BYTES_NOP5
-	.else
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key + 1 - .
-	.popsection
-.endm
-
 #endif	/* __ASSEMBLY__ */
 
 #endif

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] x86, objtool: Dont exclude arch/x86/realmode/
  2021-05-06 19:33 ` [PATCH 02/13] x86,objtool: Dont exclude arch/x86/realmode/ Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     80870e6ece78ce67b91398db88fb6b92a178f574
Gitweb:        https://git.kernel.org/tip/80870e6ece78ce67b91398db88fb6b92a178f574
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:54 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:54 +02:00

x86, objtool: Dont exclude arch/x86/realmode/

Specifically, init.c uses jump_labels.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.516200011@infradead.org
---
 arch/x86/realmode/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile
index 6b1f3a4..a0b491a 100644
--- a/arch/x86/realmode/Makefile
+++ b/arch/x86/realmode/Makefile
@@ -10,7 +10,6 @@
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
-OBJECT_FILES_NON_STANDARD	:= y
 
 subdir- := rm
 

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [tip: objtool/core] objtool: Rewrite hashtable sizing
  2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
  2021-05-12 10:41   ` Peter Zijlstra
@ 2021-05-12 13:19   ` tip-bot2 for Peter Zijlstra
  2021-06-10 18:14   ` [PATCH 01/13] " Nathan Chancellor
  2 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-12 13:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     25cf0d8aa2a3440ed32bf1f8df1310d6baf3f1e8
Gitweb:        https://git.kernel.org/tip/25cf0d8aa2a3440ed32bf1f8df1310d6baf3f1e8
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 06 May 2021 21:33:53 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 12 May 2021 14:54:50 +02:00

objtool: Rewrite hashtable sizing

Currently objtool has 5 hashtables and sizes them 16 or 20 bits
depending on the --vmlinux argument.

However, a single side doesn't really work well for the 5 tables,
which among them, cover 3 different uses. Also, while vmlinux is
larger, there is still a very wide difference between a defconfig and
allyesconfig build, which again isn't optimally covered by a single
size.

Another aspect is the cost of elf_hash_init(), which for large tables
dominates the runtime for small input files. It turns out that all it
does it assign NULL, something that is required when using malloc().
However, when we allocate memory using mmap(), we're guaranteed to get
zero filled pages.

Therefore, rewrite the whole thing to:

 1) use more dynamic sized tables, depending on the input file,
 2) avoid the need for elf_hash_init() entirely by using mmap().

This speeds up a regular kernel build (100s to 98s for
x86_64-defconfig), and potentially dramatically speeds up vmlinux
processing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.452881700@infradead.org
---
 tools/objtool/elf.c                 | 113 ++++++++++++++++-----------
 tools/objtool/include/objtool/elf.h |  17 ++--
 2 files changed, 83 insertions(+), 47 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d08f5f3..a8a0ee2 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -9,6 +9,7 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -27,21 +28,27 @@ static inline u32 str_hash(const char *str)
 	return jhash(str, strlen(str), 0);
 }
 
-static inline int elf_hash_bits(void)
-{
-	return vmlinux ? ELF_HASH_BITS : 16;
-}
+#define __elf_table(name)	(elf->name##_hash)
+#define __elf_bits(name)	(elf->name##_bits)
 
-#define elf_hash_add(hashtable, node, key) \
-	hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+#define elf_hash_add(name, node, key) \
+	hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))])
 
-static void elf_hash_init(struct hlist_head *table)
-{
-	__hash_init(table, 1U << elf_hash_bits());
-}
+#define elf_hash_for_each_possible(name, obj, member, key) \
+	hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member)
 
-#define elf_hash_for_each_possible(name, obj, member, key)			\
-	hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+#define elf_alloc_hash(name, size) \
+({ \
+	__elf_bits(name) = max(10, ilog2(size)); \
+	__elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \
+				 PROT_READ|PROT_WRITE, \
+				 MAP_PRIVATE|MAP_ANON, -1, 0); \
+	if (__elf_table(name) == (void *)-1L) { \
+		WARN("mmap fail " #name); \
+		__elf_table(name) = NULL; \
+	} \
+	__elf_table(name); \
+})
 
 static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
 {
@@ -80,9 +87,10 @@ struct section *find_section_by_name(const struct elf *elf, const char *name)
 {
 	struct section *sec;
 
-	elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+	elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) {
 		if (!strcmp(sec->name, name))
 			return sec;
+	}
 
 	return NULL;
 }
@@ -92,9 +100,10 @@ static struct section *find_section_by_index(struct elf *elf,
 {
 	struct section *sec;
 
-	elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
+	elf_hash_for_each_possible(section, sec, hash, idx) {
 		if (sec->idx == idx)
 			return sec;
+	}
 
 	return NULL;
 }
@@ -103,9 +112,10 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
 {
 	struct symbol *sym;
 
-	elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+	elf_hash_for_each_possible(symbol, sym, hash, idx) {
 		if (sym->idx == idx)
 			return sym;
+	}
 
 	return NULL;
 }
@@ -170,9 +180,10 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
 {
 	struct symbol *sym;
 
-	elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+	elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
 		if (!strcmp(sym->name, name))
 			return sym;
+	}
 
 	return NULL;
 }
@@ -189,8 +200,8 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se
 	sec = sec->reloc;
 
 	for_offset_range(o, offset, offset + len) {
-		elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
-				       sec_offset_hash(sec, o)) {
+		elf_hash_for_each_possible(reloc, reloc, hash,
+					   sec_offset_hash(sec, o)) {
 			if (reloc->sec != sec)
 				continue;
 
@@ -228,6 +239,10 @@ static int read_sections(struct elf *elf)
 		return -1;
 	}
 
+	if (!elf_alloc_hash(section, sections_nr) ||
+	    !elf_alloc_hash(section_name, sections_nr))
+		return -1;
+
 	for (i = 0; i < sections_nr; i++) {
 		sec = malloc(sizeof(*sec));
 		if (!sec) {
@@ -274,12 +289,14 @@ static int read_sections(struct elf *elf)
 		sec->len = sec->sh.sh_size;
 
 		list_add_tail(&sec->list, &elf->sections);
-		elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
-		elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+		elf_hash_add(section, &sec->hash, sec->idx);
+		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 	}
 
-	if (stats)
+	if (stats) {
 		printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+		printf("section_bits: %d\n", elf->section_bits);
+	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
 	if (elf_nextscn(elf->elf, s)) {
@@ -308,8 +325,8 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	else
 		entry = &sym->sec->symbol_list;
 	list_add(&sym->list, entry);
-	elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
-	elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+	elf_hash_add(symbol, &sym->hash, sym->idx);
+	elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
 
 	/*
 	 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
@@ -329,19 +346,25 @@ static int read_symbols(struct elf *elf)
 	Elf32_Word shndx;
 
 	symtab = find_section_by_name(elf, ".symtab");
-	if (!symtab) {
+	if (symtab) {
+		symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+		if (symtab_shndx)
+			shndx_data = symtab_shndx->data;
+
+		symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+	} else {
 		/*
 		 * A missing symbol table is actually possible if it's an empty
-		 * .o file.  This can happen for thunk_64.o.
+		 * .o file. This can happen for thunk_64.o. Make sure to at
+		 * least allocate the symbol hash tables so we can do symbol
+		 * lookups without crashing.
 		 */
-		return 0;
+		symbols_nr = 0;
 	}
 
-	symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
-	if (symtab_shndx)
-		shndx_data = symtab_shndx->data;
-
-	symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+	if (!elf_alloc_hash(symbol, symbols_nr) ||
+	    !elf_alloc_hash(symbol_name, symbols_nr))
+		return -1;
 
 	for (i = 0; i < symbols_nr; i++) {
 		sym = malloc(sizeof(*sym));
@@ -389,8 +412,10 @@ static int read_symbols(struct elf *elf)
 		elf_add_symbol(elf, sym);
 	}
 
-	if (stats)
+	if (stats) {
 		printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+		printf("symbol_bits: %d\n", elf->symbol_bits);
+	}
 
 	/* Create parent/child links for any cold subfunctions */
 	list_for_each_entry(sec, &elf->sections, list) {
@@ -479,7 +504,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
 	reloc->addend = addend;
 
 	list_add_tail(&reloc->list, &sec->reloc->reloc_list);
-	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+	elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
 	sec->reloc->changed = true;
 
@@ -556,6 +581,15 @@ static int read_relocs(struct elf *elf)
 	unsigned int symndx;
 	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
+	sec = find_section_by_name(elf, ".text");
+	if (!sec) {
+		WARN("no .text");
+		return -1;
+	}
+
+	if (!elf_alloc_hash(reloc, sec->len / 16))
+		return -1;
+
 	list_for_each_entry(sec, &elf->sections, list) {
 		if ((sec->sh.sh_type != SHT_RELA) &&
 		    (sec->sh.sh_type != SHT_REL))
@@ -600,7 +634,7 @@ static int read_relocs(struct elf *elf)
 			}
 
 			list_add_tail(&reloc->list, &sec->reloc_list);
-			elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+			elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
 			nr_reloc++;
 		}
@@ -611,6 +645,7 @@ static int read_relocs(struct elf *elf)
 	if (stats) {
 		printf("max_reloc: %lu\n", max_reloc);
 		printf("tot_reloc: %lu\n", tot_reloc);
+		printf("reloc_bits: %d\n", elf->reloc_bits);
 	}
 
 	return 0;
@@ -632,12 +667,6 @@ struct elf *elf_open_read(const char *name, int flags)
 
 	INIT_LIST_HEAD(&elf->sections);
 
-	elf_hash_init(elf->symbol_hash);
-	elf_hash_init(elf->symbol_name_hash);
-	elf_hash_init(elf->section_hash);
-	elf_hash_init(elf->section_name_hash);
-	elf_hash_init(elf->reloc_hash);
-
 	elf->fd = open(name, flags);
 	if (elf->fd == -1) {
 		fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -850,8 +879,8 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 		return NULL;
 
 	list_add_tail(&sec->list, &elf->sections);
-	elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
-	elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+	elf_hash_add(section, &sec->hash, sec->idx);
+	elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 
 	elf->changed = true;
 
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 45e5ede..9008275 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -84,11 +84,18 @@ struct elf {
 	bool changed;
 	char *name;
 	struct list_head sections;
-	DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
+
+	int symbol_bits;
+	int symbol_name_bits;
+	int section_bits;
+	int section_name_bits;
+	int reloc_bits;
+
+	struct hlist_head *symbol_hash;
+	struct hlist_head *symbol_name_hash;
+	struct hlist_head *section_hash;
+	struct hlist_head *section_name_hash;
+	struct hlist_head *reloc_hash;
 };
 
 #define OFFSET_STRIDE_BITS	4

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 12.5/13] objtool: Reflow handle_jump_alt()
  2021-05-06 19:34 ` [PATCH 12/13] objtool: Provide stats for jump_labels Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2021-05-13 14:15   ` Peter Zijlstra
  1 sibling, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-13 14:15 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel


Subject: objtool: Reflow handle_jump_alt()
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu May 13 13:52:46 CEST 2021

Miroslav figured the code flow in handle_jump_alt() was sub-optimal
with that goto. Reflow the code to make it clearer.

Reported-by: Miroslav Benes <mbenes@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |   22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,17 +1225,9 @@ static int handle_jump_alt(struct objtoo
 			   struct instruction *orig_insn,
 			   struct instruction **new_insn)
 {
-	if (orig_insn->type == INSN_NOP) {
-do_nop:
-		if (orig_insn->len == 2)
-			file->jl_nop_short++;
-		else
-			file->jl_nop_long++;
+	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL &&
+	    orig_insn->type != INSN_NOP) {
 
-		return 0;
-	}
-
-	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
 		WARN_FUNC("unsupported instruction at jump label",
 			  orig_insn->sec, orig_insn->offset);
 		return -1;
@@ -1252,7 +1244,15 @@ static int handle_jump_alt(struct objtoo
 			       orig_insn->offset, orig_insn->len,
 			       arch_nop_insn(orig_insn->len));
 		orig_insn->type = INSN_NOP;
-		goto do_nop;
+	}
+
+	if (orig_insn->type == INSN_NOP) {
+		if (orig_insn->len == 2)
+			file->jl_nop_short++;
+		else
+			file->jl_nop_long++;
+
+		return 0;
 	}
 
 	if (orig_insn->len == 2)

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 07.5/13] jump_label,x86: Remove unused JUMP_LABEL_NOP_SIZE
  2021-05-06 19:33 ` [PATCH 07/13] jump_label, x86: Add variable length patching support Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2021-05-13 14:16   ` Peter Zijlstra
  2021-05-14  7:01     ` [tip: objtool/core] jump_label/x86: " tip-bot2 for Peter Zijlstra
  1 sibling, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-13 14:16 UTC (permalink / raw)
  To: x86, jpoimboe, jbaron, rostedt, ardb; +Cc: linux-kernel


Subject: jump_label,x86: Remove unused JUMP_LABEL_NOP_SIZE
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu May 13 13:53:16 CEST 2021

JUMP_LABEL_NOP_SIZE is now unused, remove it.

Fixes: 001951bea748 ("jump_label, x86: Add variable length patching support")
Reported-by: Miroslav Benes <mbenes@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/jump_label.c |    2 --
 1 file changed, 2 deletions(-)

--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -17,8 +17,6 @@
 #include <asm/text-patching.h>
 #include <asm/insn.h>
 
-#define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
-
 int arch_jump_entry_size(struct jump_entry *entry)
 {
 	struct insn insn = {};

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [tip: objtool/core] jump_label/x86: Remove unused JUMP_LABEL_NOP_SIZE
  2021-05-13 14:16   ` [PATCH 07.5/13] jump_label,x86: Remove unused JUMP_LABEL_NOP_SIZE Peter Zijlstra
@ 2021-05-14  7:01     ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-05-14  7:01 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Miroslav Benes, Peter Zijlstra (Intel), Ingo Molnar, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     d46f61b20b060f03b58fde170ee618f17dc6f99d
Gitweb:        https://git.kernel.org/tip/d46f61b20b060f03b58fde170ee618f17dc6f99d
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Thu, 13 May 2021 16:16:47 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Fri, 14 May 2021 09:00:09 +02:00

jump_label/x86: Remove unused JUMP_LABEL_NOP_SIZE

JUMP_LABEL_NOP_SIZE is now unused, remove it.

Fixes: 001951bea748 ("jump_label, x86: Add variable length patching support")
Reported-by: Miroslav Benes <mbenes@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/YJ00zxsvocDV5vLU@hirez.programming.kicks-ass.net
---
 arch/x86/kernel/jump_label.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index a762dc1..674906f 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -17,8 +17,6 @@
 #include <asm/text-patching.h>
 #include <asm/insn.h>
 
-#define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
-
 int arch_jump_entry_size(struct jump_entry *entry)
 {
 	struct insn insn = {};

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-05-12 13:19   ` [tip: objtool/core] jump_label, x86: " tip-bot2 for Peter Zijlstra
@ 2021-05-18 19:50     ` Peter Zijlstra
  2021-05-18 20:24       ` Peter Zijlstra
  0 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-18 19:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-tip-commits, Ingo Molnar, x86, willy

On Wed, May 12, 2021 at 01:19:47PM -0000, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the objtool/core branch of tip:
> 
> Commit-ID:     ab3257042c26d0cd44793c741e2f89bf38b21fe8
> Gitweb:        https://git.kernel.org/tip/ab3257042c26d0cd44793c741e2f89bf38b21fe8
> Author:        Peter Zijlstra <peterz@infradead.org>
> AuthorDate:    Thu, 06 May 2021 21:34:05 +02:00
> Committer:     Ingo Molnar <mingo@kernel.org>
> CommitterDate: Wed, 12 May 2021 14:54:56 +02:00
> 
> jump_label, x86: Allow short NOPs
> 
> Now that objtool is able to rewrite jump_label instructions, have the
> compiler emit a JMP, such that it can decide on the optimal encoding,
> and set jump_entry::key bit1 to indicate that objtool should rewrite
> the instruction to a matching NOP.
> 
> For x86_64-allyesconfig this gives:
> 
>   jl\     NOP     JMP
>   short:  22997   124
>   long:   30874   90
> 
> IOW, we save (22997+124) * 3 bytes of kernel text in hotpaths.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Ingo Molnar <mingo@kernel.org>
> Link: https://lore.kernel.org/r/20210506194158.216763632@infradead.org

So Willy is having some trouble with this commit; for some reason his
kernel is no longer booting in his qemu thing, but I can't reproduce.

I've hacked up the below vmlinux.o validation, willy can you run this on
your vmlinux.o, something like:

	build/tools/objtool/objtool check -abdJsuld build/vmlinux.o

Where I'm assuming you build with O=build/. When I run it on my build
(with your .config) I get absolutely nothing :/

Alternatively, can you get me your vmlinux.o + bzImage ?

Also helpful might be trying to attach gdb to the qemu gdbstub and
looking where the boot fails.

---

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 8b38b5d6fec7..100f3efa6136 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -20,7 +20,7 @@
 #include <objtool/objtool.h>
 
 bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
-     validate_dup, vmlinux, mcount, noinstr, backup;
+     validate_dup, vmlinux, mcount, noinstr, backup, validate_jl;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -45,6 +45,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
 	OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
 	OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
+	OPT_BOOLEAN('J', "jump-label", &validate_jl, "validate jump-label tables"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2c6a93edf27e..c3c82e40cbee 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,6 +1225,33 @@ static int handle_jump_alt(struct objtool_file *file,
 			   struct instruction *orig_insn,
 			   struct instruction **new_insn)
 {
+	if (validate_jl) {
+#if 0
+		if (special_alt->key_addend & 2) {
+			WARN_FUNC("jump-label mod: %s", orig_insn->sec, orig_insn->offset,
+				  orig_insn->type == INSN_NOP ? "nop" : "jmp");
+		}
+#endif
+
+		if (orig_insn->len == 2) {
+			s32 disp;
+
+			if (special_alt->orig_sec != special_alt->new_sec) {
+				WARN_FUNC("short jump-label cannot cross sections",
+					  orig_insn->sec, orig_insn->offset);
+				return -1;
+			}
+
+			disp = special_alt->new_off - (special_alt->orig_off + 2);
+
+			if ((disp >> 31) != (disp >> 7)) {
+				WARN_FUNC("short jump-label, displacement too large: 0x%08x",
+					  orig_insn->sec, orig_insn->offset, disp);
+				return -1;
+			}
+		}
+	}
+
 	if (orig_insn->type == INSN_NOP) {
 do_nop:
 		if (orig_insn->len == 2)
@@ -1244,6 +1271,11 @@ static int handle_jump_alt(struct objtool_file *file,
 	if (special_alt->key_addend & 2) {
 		struct reloc *reloc = insn_reloc(file, orig_insn);
 
+		if (validate_jl) {
+			WARN_FUNC("jump-label unpatched", orig_insn->sec, orig_insn->offset);
+			return -1;
+		}
+
 		if (reloc) {
 			reloc->type = R_NONE;
 			elf_write_reloc(file->elf, reloc);
@@ -1341,6 +1373,8 @@ static int add_special_section_alts(struct objtool_file *file)
 	}
 
 	if (stats) {
+		if (validate_jl)
+			printf("validate-");
 		printf("jl\\\tNOP\tJMP\n");
 		printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
 		printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 15ac0b7d3d6a..c9a00423ebd5 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,7 +9,7 @@
 
 extern const struct option check_options[];
 extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
-            validate_dup, vmlinux, mcount, noinstr, backup;
+            validate_dup, vmlinux, mcount, noinstr, backup, validate_jl;
 
 extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
 

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-05-18 19:50     ` Peter Zijlstra
@ 2021-05-18 20:24       ` Peter Zijlstra
  2021-05-19  0:44         ` Josh Poimboeuf
  0 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-18 20:24 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-tip-commits, Ingo Molnar, x86, willy, masahiroy,
	michal.lkml, Josh Poimboeuf


+kbuild maintainers

On Tue, May 18, 2021 at 09:50:04PM +0200, Peter Zijlstra wrote:
> On Wed, May 12, 2021 at 01:19:47PM -0000, tip-bot2 for Peter Zijlstra wrote:
> > The following commit has been merged into the objtool/core branch of tip:
> > 
> > Commit-ID:     ab3257042c26d0cd44793c741e2f89bf38b21fe8
> > Gitweb:        https://git.kernel.org/tip/ab3257042c26d0cd44793c741e2f89bf38b21fe8
> > Author:        Peter Zijlstra <peterz@infradead.org>
> > AuthorDate:    Thu, 06 May 2021 21:34:05 +02:00
> > Committer:     Ingo Molnar <mingo@kernel.org>
> > CommitterDate: Wed, 12 May 2021 14:54:56 +02:00
> > 
> > jump_label, x86: Allow short NOPs
> > 
> > Now that objtool is able to rewrite jump_label instructions, have the
> > compiler emit a JMP, such that it can decide on the optimal encoding,
> > and set jump_entry::key bit1 to indicate that objtool should rewrite
> > the instruction to a matching NOP.
> > 
> > For x86_64-allyesconfig this gives:
> > 
> >   jl\     NOP     JMP
> >   short:  22997   124
> >   long:   30874   90
> > 
> > IOW, we save (22997+124) * 3 bytes of kernel text in hotpaths.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Signed-off-by: Ingo Molnar <mingo@kernel.org>
> > Link: https://lore.kernel.org/r/20210506194158.216763632@infradead.org
> 
> So Willy is having some trouble with this commit; for some reason his
> kernel is no longer booting in his qemu thing, but I can't reproduce.
> 
> I've hacked up the below vmlinux.o validation, willy can you run this on
> your vmlinux.o, something like:
> 
> 	build/tools/objtool/objtool check -abdJsuld build/vmlinux.o
> 
> Where I'm assuming you build with O=build/. When I run it on my build
> (with your .config) I get absolutely nothing :/
> 
> Alternatively, can you get me your vmlinux.o + bzImage ?
> 
> Also helpful might be trying to attach gdb to the qemu gdbstub and
> looking where the boot fails.

OK, willy followed up on IRC, and it turns out there's a kbuild
dependency missing; then objtool changes we don't rebuild:

  arch/x86/entry/vdso/vma.o

even though we should, this led to an unpatched 2 byte jump-label and
things went sideways. I'm not sure I understand the whole build
machinery well enough to know where to begin chasing this.

Now, this file is mighty magical, due to:

arch/x86/entry/vdso/Makefile:OBJECT_FILES_NON_STANDARD  := y
arch/x86/entry/vdso/Makefile:OBJECT_FILES_NON_STANDARD_vma.o    := n

Maybe that's related.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-05-18 20:24       ` Peter Zijlstra
@ 2021-05-19  0:44         ` Josh Poimboeuf
  2021-05-19  6:56           ` Peter Zijlstra
  0 siblings, 1 reply; 48+ messages in thread
From: Josh Poimboeuf @ 2021-05-19  0:44 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, linux-tip-commits, Ingo Molnar, x86, willy,
	masahiroy, michal.lkml

On Tue, May 18, 2021 at 10:24:43PM +0200, Peter Zijlstra wrote:
> OK, willy followed up on IRC, and it turns out there's a kbuild
> dependency missing; then objtool changes we don't rebuild:
> 
>   arch/x86/entry/vdso/vma.o
> 
> even though we should, this led to an unpatched 2 byte jump-label and
> things went sideways. I'm not sure I understand the whole build
> machinery well enough to know where to begin chasing this.
> 
> Now, this file is mighty magical, due to:
> 
> arch/x86/entry/vdso/Makefile:OBJECT_FILES_NON_STANDARD  := y
> arch/x86/entry/vdso/Makefile:OBJECT_FILES_NON_STANDARD_vma.o    := n
> 
> Maybe that's related.

I'm not exactly thrilled that objtool now has the power to easily brick
a system :-/  Is it really worth it?

Anyway, here's one way to fix it.  Maybe Masahiro has a better idea.

From f88b208677953bc445db08ac46b6e4259217bb8a Mon Sep 17 00:00:00 2001
Message-Id: <f88b208677953bc445db08ac46b6e4259217bb8a.1621384807.git.jpoimboe@redhat.com>
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 18 May 2021 18:59:15 -0500
Subject: [PATCH] kbuild: Fix objtool dependency for
 'OBJECT_FILES_NON_STANDARD_<obj> := n'

"OBJECT_FILES_NON_STANDARD_vma.o := n" has a dependency bug.  When
objtool source is updated, the affected object doesn't get re-analyzed
by objtool.

Peter's new variable-sized jump label feature relies on objtool
rewriting the object file.  Otherwise the system can fail to boot.  That
effectively upgrades this minor dependency issue to a major bug.

The problem is that variables in prerequisites are expanded early,
during the read-in phase.  The '$(objtool_dep)' variable indirectly uses
'$@', which isn't yet available when the target prerequisites are
evaluated.

Use '.SECONDEXPANSION:' which causes '$(objtool_dep)' to be expanded in
a later phase, after the target-specific '$@' variable has been defined.

Fixes: b9ab5ebb14ec ("objtool: Add CONFIG_STACK_VALIDATION option")
Fixes: ab3257042c26 ("jump_label, x86: Allow short NOPs")
Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 scripts/Makefile.build | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 949f723efe53..34d257653fb4 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -268,7 +268,8 @@ define rule_as_o_S
 endef
 
 # Built-in and composite module parts
-$(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
+.SECONDEXPANSION:
+$(obj)/%.o: $(src)/%.c $(recordmcount_source) $$(objtool_dep) FORCE
 	$(call if_changed_rule,cc_o_c)
 	$(call cmd,force_checksrc)
 
@@ -349,7 +350,7 @@ cmd_modversions_S =								\
 	fi
 endif
 
-$(obj)/%.o: $(src)/%.S $(objtool_dep) FORCE
+$(obj)/%.o: $(src)/%.S $$(objtool_dep) FORCE
 	$(call if_changed_rule,as_o_S)
 
 targets += $(filter-out $(subdir-builtin), $(real-obj-y))
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-05-19  0:44         ` Josh Poimboeuf
@ 2021-05-19  6:56           ` Peter Zijlstra
  2021-06-29 20:00             ` Matthew Wilcox
  0 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-05-19  6:56 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: linux-kernel, linux-tip-commits, Ingo Molnar, x86, willy,
	masahiroy, michal.lkml

On Tue, May 18, 2021 at 07:44:11PM -0500, Josh Poimboeuf wrote:

> I'm not exactly thrilled that objtool now has the power to easily brick
> a system :-/  Is it really worth it?

The way I look at it is that not running objtool is a bug either way,
bricking a system is ofcourse a somewhat more drastic failure mode than
missing ORC info for example, but neither are good.

As to worth, about half the jump labels are shorter now, this reduces I$
pressure on hot paths. Any little thing to offset the ever increasing
bulk seems like a good thing to me. But yes, it would be nice if the
assemblers wouldn't suck so bad and this wouldn't need objtool :/ But
I've tried poking the tools guys and they don't really seem interested
:-(

Also, only dirty builds are affected here; clean builds (always
recommended afaik, because dep trouble isn't unheard of) are fine.

> Anyway, here's one way to fix it.  Maybe Masahiro has a better idea.

Thanks! lemme go read up on this magic :-)

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
  2021-05-12 10:41   ` Peter Zijlstra
  2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2021-06-10 18:14   ` Nathan Chancellor
  2021-06-10 18:43     ` Peter Zijlstra
  2021-06-10 18:50     ` Sami Tolvanen
  2 siblings, 2 replies; 48+ messages in thread
From: Nathan Chancellor @ 2021-06-10 18:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: x86, jpoimboe, jbaron, rostedt, ardb, linux-kernel, samitolvanen,
	ndesaulniers, clang-built-linux

Hi Peter,

On Thu, May 06, 2021 at 09:33:53PM +0200, Peter Zijlstra wrote:
> Currently objtool has 5 hashtables and sizes them 16 or 20 bits
> depending on the --vmlinux argument.
> 
> However, a single side doesn't really work well for the 5 tables,
> which among them, cover 3 different uses. Also, while vmlinux is
> larger, there is still a very wide difference between a defconfig and
> allyesconfig build, which again isn't optimally covered by a single
> size.
> 
> Another aspect is the cost of elf_hash_init(), which for large tables
> dominates the runtime for small input files. It turns out that all it
> does it assign NULL, something that is required when using malloc().
> However, when we allocate memory using mmap(), we're guaranteed to get
> zero filled pages.
> 
> Therefore, rewrite the whole thing to:
> 
>  1) use more dynamic sized tables, depending on the input file,
>  2) avoid the need for elf_hash_init() entirely by using mmap().
> 
> This speeds up a regular kernel build (100s to 98s for
> x86_64-defconfig), and potentially dramatically speeds up vmlinux
> processing.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

This patch as commit 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing")
in -tip causes a massive compile time regression with allmodconfig +
ThinLTO.

At v5.13-rc1, the performance penalty is only about 23%, as measured with
hyperfine for two runs [1]:

Benchmark #1: allmodconfig
  Time (mean ± σ):     625.173 s ±  2.198 s    [User: 35120.895 s, System: 2176.868 s]
  Range (min … max):   623.619 s … 626.727 s    2 runs

Benchmark #2: allmodconfig with ThinLTO
  Time (mean ± σ):     771.034 s ±  0.369 s    [User: 39706.084 s, System: 2326.166 s]
  Range (min … max):   770.773 s … 771.295 s    2 runs

Summary
  'allmodconfig' ran
    1.23 ± 0.00 times faster than 'allmodconfig with ThinLTO'

However, at 25cf0d8aa2a3, it is almost 150% on a 64-core server.

Benchmark #1: allmodconfig
  Time (mean ± σ):     624.759 s ±  2.153 s    [User: 35114.379 s, System: 2145.456 s]
  Range (min … max):   623.237 s … 626.281 s    2 runs

Benchmark #2: allmodconfig with ThinLTO
  Time (mean ± σ):     1555.377 s ± 12.806 s    [User: 40558.463 s, System: 2310.139 s]
  Range (min … max):   1546.321 s … 1564.432 s    2 runs

Summary
  'allmodconfig' ran
    2.49 ± 0.02 times faster than 'allmodconfig with ThinLTO'

Adding Sami because I am not sure why this patch would have much of an impact
in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
question.

If I can provide any further information or help debug, please let me know.

If you are interested in reproducing this locally, you will need a
fairly recent LLVM stack (I used the stable release/12.x branch) and to
cherry-pick commit 976aac5f8829 ("kcsan: Fix debugfs initcall return
type") to fix an unrelated build failure. My script [2] can build a
self-contained toolchain fairly quickly if you cannot get one from your
package manager. A command like below will speed up the build a bit:

$ ./build-llvm.py \
    --branch "release/12.x" \
    --build-stage1-only \
    --install-stage1-only \
    --projects "clang;lld" \
    --targets X86

After adding the "install/bin" directory to PATH:

$ echo "CONFIG_GCOV_KERNEL=n
CONFIG_KASAN=n
CONFIG_LTO_CLANG_THIN=y" >allmod.config

$ make -skj"$(nproc)" LLVM=1 LLVM_IAS=1 allmodconfig all

[1]: https://github.com/sharkdp/hyperfine
[2]: https://github.com/ClangBuiltLinux/tc-build

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-06-10 18:14   ` [PATCH 01/13] " Nathan Chancellor
@ 2021-06-10 18:43     ` Peter Zijlstra
  2021-06-10 18:54       ` Nathan Chancellor
  2021-06-10 18:50     ` Sami Tolvanen
  1 sibling, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-06-10 18:43 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: x86, jpoimboe, jbaron, rostedt, ardb, linux-kernel, samitolvanen,
	ndesaulniers, clang-built-linux

On Thu, Jun 10, 2021 at 11:14:51AM -0700, Nathan Chancellor wrote:

> This patch as commit 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing")
> in -tip causes a massive compile time regression with allmodconfig +
> ThinLTO.

Moo... the allyesconfig builds I used it on were much faster, but that
was on regular GCC vmlinux.o after linking.

> Adding Sami because I am not sure why this patch would have much of an impact
> in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
> question.
> 
> If I can provide any further information or help debug, please let me know.
> 
> If you are interested in reproducing this locally, you will need a
> fairly recent LLVM stack (I used the stable release/12.x branch) and to
> cherry-pick commit 976aac5f8829 ("kcsan: Fix debugfs initcall return
> type") to fix an unrelated build failure. My script [2] can build a
> self-contained toolchain fairly quickly if you cannot get one from your
> package manager. A command like below will speed up the build a bit:

Would something like llvm-13 from Debian be good enough?

$ clang-13 --version
Debian clang version 13.0.0-++20210418105309+a0898f0cecc7-1~exp1
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-06-10 18:14   ` [PATCH 01/13] " Nathan Chancellor
  2021-06-10 18:43     ` Peter Zijlstra
@ 2021-06-10 18:50     ` Sami Tolvanen
  2021-06-10 19:33       ` Peter Zijlstra
  1 sibling, 1 reply; 48+ messages in thread
From: Sami Tolvanen @ 2021-06-10 18:50 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Peter Zijlstra, X86 ML, Josh Poimboeuf, Jason Baron,
	Steven Rostedt, Ard Biesheuvel, LKML, Nick Desaulniers,
	clang-built-linux

On Thu, Jun 10, 2021 at 11:14 AM Nathan Chancellor <nathan@kernel.org> wrote:
> Adding Sami because I am not sure why this patch would have much of an impact
> in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
> question.

It's because LLVM enables -ffunction-sections with LTO, so using .text
section size to estimate the reloc hash table size isn't going to be
accurate, as confirmed by objtool output with --stats:

  OBJTOOL vmlinux.o
nr_sections: 141481
section_bits: 17
nr_symbols: 215262
symbol_bits: 17
max_reloc: 24850
tot_reloc: 590890
reloc_bits: 10

Sami

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-06-10 18:43     ` Peter Zijlstra
@ 2021-06-10 18:54       ` Nathan Chancellor
  0 siblings, 0 replies; 48+ messages in thread
From: Nathan Chancellor @ 2021-06-10 18:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: x86, jpoimboe, jbaron, rostedt, ardb, linux-kernel, samitolvanen,
	ndesaulniers, clang-built-linux

On 6/10/2021 11:43 AM, Peter Zijlstra wrote:
> On Thu, Jun 10, 2021 at 11:14:51AM -0700, Nathan Chancellor wrote:
> 
>> This patch as commit 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing")
>> in -tip causes a massive compile time regression with allmodconfig +
>> ThinLTO.
> 
> Moo... the allyesconfig builds I used it on were much faster, but that
> was on regular GCC vmlinux.o after linking.
> 
>> Adding Sami because I am not sure why this patch would have much of an impact
>> in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
>> question.
>>
>> If I can provide any further information or help debug, please let me know.
>>
>> If you are interested in reproducing this locally, you will need a
>> fairly recent LLVM stack (I used the stable release/12.x branch) and to
>> cherry-pick commit 976aac5f8829 ("kcsan: Fix debugfs initcall return
>> type") to fix an unrelated build failure. My script [2] can build a
>> self-contained toolchain fairly quickly if you cannot get one from your
>> package manager. A command like below will speed up the build a bit:
> 
> Would something like llvm-13 from Debian be good enough?
> 
> $ clang-13 --version
> Debian clang version 13.0.0-++20210418105309+a0898f0cecc7-1~exp1
> Target: x86_64-pc-linux-gnu
> Thread model: posix
> InstalledDir: /usr/bin
> 

Yes, that would work. That is what we use in our CI.

Looks like Sami gave a reply that explains it.

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-06-10 18:50     ` Sami Tolvanen
@ 2021-06-10 19:33       ` Peter Zijlstra
  2021-06-10 19:43         ` Sami Tolvanen
                           ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-06-10 19:33 UTC (permalink / raw)
  To: Sami Tolvanen
  Cc: Nathan Chancellor, X86 ML, Josh Poimboeuf, Jason Baron,
	Steven Rostedt, Ard Biesheuvel, LKML, Nick Desaulniers,
	clang-built-linux

On Thu, Jun 10, 2021 at 11:50:36AM -0700, Sami Tolvanen wrote:
> On Thu, Jun 10, 2021 at 11:14 AM Nathan Chancellor <nathan@kernel.org> wrote:
> > Adding Sami because I am not sure why this patch would have much of an impact
> > in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
> > question.
> 
> It's because LLVM enables -ffunction-sections with LTO, so using .text
> section size to estimate the reloc hash table size isn't going to be
> accurate, as confirmed by objtool output with --stats:
> 
>   OBJTOOL vmlinux.o
> nr_sections: 141481
> section_bits: 17
> nr_symbols: 215262
> symbol_bits: 17
> max_reloc: 24850
> tot_reloc: 590890
> reloc_bits: 10

Bah. Would something like the *completely* untested below help with that?

---
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 25f6d293bc86..8676c7598728 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -288,6 +288,9 @@ static int read_sections(struct elf *elf)
 		}
 		sec->len = sec->sh.sh_size;
 
+		if (sec->sh.sh_flags & SHF_EXECINSTR)
+			elf->text_size += sec->len;
+
 		list_add_tail(&sec->list, &elf->sections);
 		elf_hash_add(section, &sec->hash, sec->idx);
 		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
@@ -581,13 +584,7 @@ static int read_relocs(struct elf *elf)
 	unsigned int symndx;
 	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
-	sec = find_section_by_name(elf, ".text");
-	if (!sec) {
-		WARN("no .text");
-		return -1;
-	}
-
-	if (!elf_alloc_hash(reloc, sec->len / 16))
+	if (!elf_alloc_hash(reloc, elf->text_size / 16))
 		return -1;
 
 	list_for_each_entry(sec, &elf->sections, list) {
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 90082751f851..e34395047530 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -83,6 +83,7 @@ struct elf {
 	int fd;
 	bool changed;
 	char *name;
+	unsigned int text_size;
 	struct list_head sections;
 
 	int symbol_bits;

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-06-10 19:33       ` Peter Zijlstra
@ 2021-06-10 19:43         ` Sami Tolvanen
  2021-06-10 20:59         ` Nathan Chancellor
  2021-06-14 13:19         ` [tip: objtool/core] objtool: Improve reloc hash size guestimate tip-bot2 for Peter Zijlstra
  2 siblings, 0 replies; 48+ messages in thread
From: Sami Tolvanen @ 2021-06-10 19:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nathan Chancellor, X86 ML, Josh Poimboeuf, Jason Baron,
	Steven Rostedt, Ard Biesheuvel, LKML, Nick Desaulniers,
	clang-built-linux

On Thu, Jun 10, 2021 at 12:33 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Jun 10, 2021 at 11:50:36AM -0700, Sami Tolvanen wrote:
> > On Thu, Jun 10, 2021 at 11:14 AM Nathan Chancellor <nathan@kernel.org> wrote:
> > > Adding Sami because I am not sure why this patch would have much of an impact
> > > in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
> > > question.
> >
> > It's because LLVM enables -ffunction-sections with LTO, so using .text
> > section size to estimate the reloc hash table size isn't going to be
> > accurate, as confirmed by objtool output with --stats:
> >
> >   OBJTOOL vmlinux.o
> > nr_sections: 141481
> > section_bits: 17
> > nr_symbols: 215262
> > symbol_bits: 17
> > max_reloc: 24850
> > tot_reloc: 590890
> > reloc_bits: 10
>
> Bah. Would something like the *completely* untested below help with that?

Yes, that seems to work:

tot_reloc: 590890
reloc_bits: 19

Nathan, can you confirm if this fixes the regression for you?

Sami

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 01/13] objtool: Rewrite hashtable sizing
  2021-06-10 19:33       ` Peter Zijlstra
  2021-06-10 19:43         ` Sami Tolvanen
@ 2021-06-10 20:59         ` Nathan Chancellor
  2021-06-14 13:19         ` [tip: objtool/core] objtool: Improve reloc hash size guestimate tip-bot2 for Peter Zijlstra
  2 siblings, 0 replies; 48+ messages in thread
From: Nathan Chancellor @ 2021-06-10 20:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Sami Tolvanen, X86 ML, Josh Poimboeuf, Jason Baron,
	Steven Rostedt, Ard Biesheuvel, LKML, Nick Desaulniers,
	clang-built-linux

On Thu, Jun 10, 2021 at 09:33:44PM +0200, Peter Zijlstra wrote:
> On Thu, Jun 10, 2021 at 11:50:36AM -0700, Sami Tolvanen wrote:
> > On Thu, Jun 10, 2021 at 11:14 AM Nathan Chancellor <nathan@kernel.org> wrote:
> > > Adding Sami because I am not sure why this patch would have much of an impact
> > > in relation to LTO. https://git.kernel.org/tip/25cf0d8aa2a3 is the patch in
> > > question.
> > 
> > It's because LLVM enables -ffunction-sections with LTO, so using .text
> > section size to estimate the reloc hash table size isn't going to be
> > accurate, as confirmed by objtool output with --stats:
> > 
> >   OBJTOOL vmlinux.o
> > nr_sections: 141481
> > section_bits: 17
> > nr_symbols: 215262
> > symbol_bits: 17
> > max_reloc: 24850
> > tot_reloc: 590890
> > reloc_bits: 10
> 
> Bah. Would something like the *completely* untested below help with that?

LGTM, thanks for the quick fix!

Benchmark #1: allmodconfig
  Time (mean ± σ):     624.555 s ±  2.089 s    [User: 35109.967 s, System: 2146.215 s]
  Range (min … max):   623.078 s … 626.032 s    2 runs

Benchmark #2: allmodconfig with ThinLTO
  Time (mean ± σ):     769.959 s ±  1.819 s    [User: 39692.409 s, System: 2308.010 s]
  Range (min … max):   768.673 s … 771.245 s    2 runs

Summary
  'allmodconfig' ran
    1.23 ± 0.01 times faster than 'allmodconfig with ThinLTO'

Tested-by: Nathan Chancellor <nathan@kernel.org>

> ---
> diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
> index 25f6d293bc86..8676c7598728 100644
> --- a/tools/objtool/elf.c
> +++ b/tools/objtool/elf.c
> @@ -288,6 +288,9 @@ static int read_sections(struct elf *elf)
>  		}
>  		sec->len = sec->sh.sh_size;
>  
> +		if (sec->sh.sh_flags & SHF_EXECINSTR)
> +			elf->text_size += sec->len;
> +
>  		list_add_tail(&sec->list, &elf->sections);
>  		elf_hash_add(section, &sec->hash, sec->idx);
>  		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
> @@ -581,13 +584,7 @@ static int read_relocs(struct elf *elf)
>  	unsigned int symndx;
>  	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
>  
> -	sec = find_section_by_name(elf, ".text");
> -	if (!sec) {
> -		WARN("no .text");
> -		return -1;
> -	}
> -
> -	if (!elf_alloc_hash(reloc, sec->len / 16))
> +	if (!elf_alloc_hash(reloc, elf->text_size / 16))
>  		return -1;
>  
>  	list_for_each_entry(sec, &elf->sections, list) {
> diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
> index 90082751f851..e34395047530 100644
> --- a/tools/objtool/include/objtool/elf.h
> +++ b/tools/objtool/include/objtool/elf.h
> @@ -83,6 +83,7 @@ struct elf {
>  	int fd;
>  	bool changed;
>  	char *name;
> +	unsigned int text_size;
>  	struct list_head sections;
>  
>  	int symbol_bits;

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [tip: objtool/core] objtool: Improve reloc hash size guestimate
  2021-06-10 19:33       ` Peter Zijlstra
  2021-06-10 19:43         ` Sami Tolvanen
  2021-06-10 20:59         ` Nathan Chancellor
@ 2021-06-14 13:19         ` tip-bot2 for Peter Zijlstra
  2 siblings, 0 replies; 48+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-06-14 13:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Nathan Chancellor, Peter Zijlstra (Intel), x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     d33b9035e14a35f6f2a5f067f0b156a93581811d
Gitweb:        https://git.kernel.org/tip/d33b9035e14a35f6f2a5f067f0b156a93581811d
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 11 Jun 2021 08:33:36 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 14 Jun 2021 14:05:36 +02:00

objtool: Improve reloc hash size guestimate

Nathan reported that LLVM ThinLTO builds have a performance regression
with commit 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing"). Sami
was quick to note that this is due to their use of -ffunction-sections.

As a result the .text section is small and basing the number of relocs
off of that no longer works. Instead have read_sections() compute the
sum of all SHF_EXECINSTR sections and use that.

Fixes: 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Debugged-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lkml.kernel.org/r/YMJpGLuGNsGtA5JJ@hirez.programming.kicks-ass.net
---
 tools/objtool/elf.c                 | 11 ++++-------
 tools/objtool/include/objtool/elf.h |  1 +
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index a8a0ee2..2371ccc 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -288,6 +288,9 @@ static int read_sections(struct elf *elf)
 		}
 		sec->len = sec->sh.sh_size;
 
+		if (sec->sh.sh_flags & SHF_EXECINSTR)
+			elf->text_size += sec->len;
+
 		list_add_tail(&sec->list, &elf->sections);
 		elf_hash_add(section, &sec->hash, sec->idx);
 		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
@@ -581,13 +584,7 @@ static int read_relocs(struct elf *elf)
 	unsigned int symndx;
 	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
-	sec = find_section_by_name(elf, ".text");
-	if (!sec) {
-		WARN("no .text");
-		return -1;
-	}
-
-	if (!elf_alloc_hash(reloc, sec->len / 16))
+	if (!elf_alloc_hash(reloc, elf->text_size / 16))
 		return -1;
 
 	list_for_each_entry(sec, &elf->sections, list) {
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 9008275..e343950 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -83,6 +83,7 @@ struct elf {
 	int fd;
 	bool changed;
 	char *name;
+	unsigned int text_size;
 	struct list_head sections;
 
 	int symbol_bits;

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-05-19  6:56           ` Peter Zijlstra
@ 2021-06-29 20:00             ` Matthew Wilcox
  2021-06-29 20:35               ` Matthew Wilcox
  0 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox @ 2021-06-29 20:00 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Josh Poimboeuf, linux-kernel, linux-tip-commits, Ingo Molnar,
	x86, masahiroy, michal.lkml, Linus Torvalds


So this got merged without the corresponding Kbuild update being merged,
and my kernel failed to boot.  Bisect got as far as

$ git bisect good
Bisecting: 4 revisions left to test after this (roughly 2 steps)
[ab3257042c26d0cd44793c741e2f89bf38b21fe8] jump_label, x86: Allow short NOPs

before my sluggish memory remembered this thread from six weeks ago.

So if anybody else hits this, do a make clean.

On Wed, May 19, 2021 at 08:56:33AM +0200, Peter Zijlstra wrote:
> On Tue, May 18, 2021 at 07:44:11PM -0500, Josh Poimboeuf wrote:
> 
> > I'm not exactly thrilled that objtool now has the power to easily brick
> > a system :-/  Is it really worth it?
> 
> The way I look at it is that not running objtool is a bug either way,
> bricking a system is ofcourse a somewhat more drastic failure mode than
> missing ORC info for example, but neither are good.
> 
> As to worth, about half the jump labels are shorter now, this reduces I$
> pressure on hot paths. Any little thing to offset the ever increasing
> bulk seems like a good thing to me. But yes, it would be nice if the
> assemblers wouldn't suck so bad and this wouldn't need objtool :/ But
> I've tried poking the tools guys and they don't really seem interested
> :-(
> 
> Also, only dirty builds are affected here; clean builds (always
> recommended afaik, because dep trouble isn't unheard of) are fine.
> 
> > Anyway, here's one way to fix it.  Maybe Masahiro has a better idea.
> 
> Thanks! lemme go read up on this magic :-)

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-06-29 20:00             ` Matthew Wilcox
@ 2021-06-29 20:35               ` Matthew Wilcox
  2021-06-30  7:07                 ` Peter Zijlstra
  0 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox @ 2021-06-29 20:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Josh Poimboeuf, linux-kernel, linux-tip-commits, Ingo Molnar,
	x86, masahiroy, michal.lkml, Linus Torvalds

On Tue, Jun 29, 2021 at 09:01:25PM +0100, Matthew Wilcox wrote:
> So this got merged without the corresponding Kbuild update being merged,
> and my kernel failed to boot.  Bisect got as far as
> 
> $ git bisect good
> Bisecting: 4 revisions left to test after this (roughly 2 steps)
> [ab3257042c26d0cd44793c741e2f89bf38b21fe8] jump_label, x86: Allow short NOPs
> 
> before my sluggish memory remembered this thread from six weeks ago.
> 
> So if anybody else hits this, do a make clean.

Actually, this is a different bug with the same symptom.

Applying the patch from Peter, and running it:

$ ./.build_test_kernel-x86_64/tools/objtool/objtool check -abdJsuld .build_test_kernel-x86_64/vmlinux.o
nr_sections: 15446
section_bits: 13
nr_symbols: 116448
symbol_bits: 16
max_reloc: 8031700
tot_reloc: 12477754
reloc_bits: 19
nr_insns: 2523443
.build_test_kernel-x86_64/vmlinux.o: warning: objtool: want_init_on_free()+0x0: jump-label unpatched

This is against a freshly built kernel -- i removed the build directory,
copied in a .config file and built a fresh kernel.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-06-29 20:35               ` Matthew Wilcox
@ 2021-06-30  7:07                 ` Peter Zijlstra
  2021-06-30  7:38                   ` Peter Zijlstra
  0 siblings, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2021-06-30  7:07 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Josh Poimboeuf, linux-kernel, linux-tip-commits, Ingo Molnar,
	x86, masahiroy, michal.lkml, Linus Torvalds

On Tue, Jun 29, 2021 at 09:35:37PM +0100, Matthew Wilcox wrote:
> On Tue, Jun 29, 2021 at 09:01:25PM +0100, Matthew Wilcox wrote:
> > So this got merged without the corresponding Kbuild update being merged,
> > and my kernel failed to boot.  Bisect got as far as
> > 
> > $ git bisect good
> > Bisecting: 4 revisions left to test after this (roughly 2 steps)
> > [ab3257042c26d0cd44793c741e2f89bf38b21fe8] jump_label, x86: Allow short NOPs
> > 
> > before my sluggish memory remembered this thread from six weeks ago.
> > 
> > So if anybody else hits this, do a make clean.
> 
> Actually, this is a different bug with the same symptom.
> 
> Applying the patch from Peter, and running it:
> 
> $ ./.build_test_kernel-x86_64/tools/objtool/objtool check -abdJsuld .build_test_kernel-x86_64/vmlinux.o
> nr_sections: 15446
> section_bits: 13
> nr_symbols: 116448
> symbol_bits: 16
> max_reloc: 8031700
> tot_reloc: 12477754
> reloc_bits: 19
> nr_insns: 2523443
> .build_test_kernel-x86_64/vmlinux.o: warning: objtool: want_init_on_free()+0x0: jump-label unpatched
> 
> This is against a freshly built kernel -- i removed the build directory,
> copied in a .config file and built a fresh kernel.

You happen to have said .config for me?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [tip: objtool/core] jump_label, x86: Allow short NOPs
  2021-06-30  7:07                 ` Peter Zijlstra
@ 2021-06-30  7:38                   ` Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2021-06-30  7:38 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Josh Poimboeuf, linux-kernel, linux-tip-commits, Ingo Molnar,
	x86, masahiroy, michal.lkml, Linus Torvalds

On Wed, Jun 30, 2021 at 09:07:05AM +0200, Peter Zijlstra wrote:
> On Tue, Jun 29, 2021 at 09:35:37PM +0100, Matthew Wilcox wrote:
> > On Tue, Jun 29, 2021 at 09:01:25PM +0100, Matthew Wilcox wrote:
> > > So this got merged without the corresponding Kbuild update being merged,
> > > and my kernel failed to boot.  Bisect got as far as
> > > 
> > > $ git bisect good
> > > Bisecting: 4 revisions left to test after this (roughly 2 steps)
> > > [ab3257042c26d0cd44793c741e2f89bf38b21fe8] jump_label, x86: Allow short NOPs
> > > 
> > > before my sluggish memory remembered this thread from six weeks ago.
> > > 
> > > So if anybody else hits this, do a make clean.
> > 
> > Actually, this is a different bug with the same symptom.
> > 
> > Applying the patch from Peter, and running it:
> > 
> > $ ./.build_test_kernel-x86_64/tools/objtool/objtool check -abdJsuld .build_test_kernel-x86_64/vmlinux.o
> > nr_sections: 15446
> > section_bits: 13
> > nr_symbols: 116448
> > symbol_bits: 16
> > max_reloc: 8031700
> > tot_reloc: 12477754
> > reloc_bits: 19
> > nr_insns: 2523443
> > .build_test_kernel-x86_64/vmlinux.o: warning: objtool: want_init_on_free()+0x0: jump-label unpatched
> > 
> > This is against a freshly built kernel -- i removed the build directory,
> > copied in a .config file and built a fresh kernel.
> 
> You happen to have said .config for me?

Also GCC version I suppose. The thing I'm wondering about in particular
is what translation unit is responsible for that symbol.

AFAICT the function itself is an inline from linux/mm.h, but I cannot
find any of the files or functions it's used in as being excluded from
objtool coverage :/

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2021-06-30  7:40 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-06 19:33 [PATCH 00/13] jump_label: Yet another attempt at variable sized jump_labels Peter Zijlstra
2021-05-06 19:33 ` [PATCH 01/13] objtool: Rewrite hashtable sizing Peter Zijlstra
2021-05-12 10:41   ` Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-06-10 18:14   ` [PATCH 01/13] " Nathan Chancellor
2021-06-10 18:43     ` Peter Zijlstra
2021-06-10 18:54       ` Nathan Chancellor
2021-06-10 18:50     ` Sami Tolvanen
2021-06-10 19:33       ` Peter Zijlstra
2021-06-10 19:43         ` Sami Tolvanen
2021-06-10 20:59         ` Nathan Chancellor
2021-06-14 13:19         ` [tip: objtool/core] objtool: Improve reloc hash size guestimate tip-bot2 for Peter Zijlstra
2021-05-06 19:33 ` [PATCH 02/13] x86,objtool: Dont exclude arch/x86/realmode/ Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] x86, objtool: " tip-bot2 for Peter Zijlstra
2021-05-06 19:33 ` [PATCH 03/13] jump_label, x86: Strip ASM jump_label support Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:33 ` [PATCH 04/13] jump_label, x86: Factor out the __jump_table generation Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:33 ` [PATCH 05/13] jump_label, x86: Improve error when we fail expected text Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:33 ` [PATCH 06/13] jump_label, x86: Introduce jump_entry_size() Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:33 ` [PATCH 07/13] jump_label, x86: Add variable length patching support Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-13 14:16   ` [PATCH 07.5/13] jump_label,x86: Remove unused JUMP_LABEL_NOP_SIZE Peter Zijlstra
2021-05-14  7:01     ` [tip: objtool/core] jump_label/x86: " tip-bot2 for Peter Zijlstra
2021-05-06 19:34 ` [PATCH 08/13] jump_label: Free jump_entry::key bit1 for build use Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:34 ` [PATCH 09/13] jump_label,x86: Emit short JMP Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] jump_label, x86: " tip-bot2 for Peter Zijlstra
2021-05-06 19:34 ` [PATCH 10/13] objtool: Decode jump_entry::key addend Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:34 ` [PATCH 11/13] objtool: Rewrite jump_label instructions Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-06 19:34 ` [PATCH 12/13] objtool: Provide stats for jump_labels Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2021-05-13 14:15   ` [PATCH 12.5/13] objtool: Reflow handle_jump_alt() Peter Zijlstra
2021-05-06 19:34 ` [PATCH 13/13] jump_label,x86: Allow short NOPs Peter Zijlstra
2021-05-06 19:49   ` Peter Zijlstra
2021-05-12 13:19   ` [tip: objtool/core] jump_label, x86: " tip-bot2 for Peter Zijlstra
2021-05-18 19:50     ` Peter Zijlstra
2021-05-18 20:24       ` Peter Zijlstra
2021-05-19  0:44         ` Josh Poimboeuf
2021-05-19  6:56           ` Peter Zijlstra
2021-06-29 20:00             ` Matthew Wilcox
2021-06-29 20:35               ` Matthew Wilcox
2021-06-30  7:07                 ` Peter Zijlstra
2021-06-30  7:38                   ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).