All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/10] objtool: Honey, I shrunk the instruction
@ 2023-02-08 17:17 Peter Zijlstra
  2023-02-08 17:17 ` [PATCH 01/10] objtool: Change arch_decode_instruction() signature Peter Zijlstra
                   ` (13 more replies)
  0 siblings, 14 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:17 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

Hi,

Boris complained he could no longer build allyesconfig on his 32G desktop
machine without having OOM terminate either objtool or chrome.

After talking about these patches on IRC, Nathan mentioned the linux-clang CI
was also having trouble of recent, and these patches appear to make it happy
again.

In total these patches shrink an allyesconfig run by about 6G:

pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem

Also at:

  https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 01/10] objtool: Change arch_decode_instruction() signature
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
@ 2023-02-08 17:17 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:17 ` [PATCH 02/10] objtool: Make instruction::stack_ops a single-linked list Peter Zijlstra
                   ` (12 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:17 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

In preparation to changing struct instruction around a bit, avoid
passing it's members by pointer and instead pass the whole thing.

A cleanup in it's own right too.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/arch/powerpc/decode.c  |   22 +++----
 tools/objtool/arch/x86/decode.c      |  105 +++++++++++++++++------------------
 tools/objtool/check.c                |    4 -
 tools/objtool/include/objtool/arch.h |    4 -
 4 files changed, 64 insertions(+), 71 deletions(-)

--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -41,38 +41,36 @@ const char *arch_ret_insn(int len)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list)
+			    struct instruction *insn)
 {
 	unsigned int opcode;
 	enum insn_type typ;
 	unsigned long imm;
-	u32 insn;
+	u32 ins;
 
-	insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
-	opcode = insn >> 26;
+	ins = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
+	opcode = ins >> 26;
 	typ = INSN_OTHER;
 	imm = 0;
 
 	switch (opcode) {
 	case 18: /* b[l][a] */
-		if ((insn & 3) == 1) /* bl */
+		if ((ins & 3) == 1) /* bl */
 			typ = INSN_CALL;
 
-		imm = insn & 0x3fffffc;
+		imm = ins & 0x3fffffc;
 		if (imm & 0x2000000)
 			imm -= 0x4000000;
 		break;
 	}
 
 	if (opcode == 1)
-		*len = 8;
+		insn->len = 8;
 	else
-		*len = 4;
+		insn->len = 4;
 
-	*type = typ;
-	*immediate = imm;
+	insn->type = typ;
+	insn->immediate = imm;
 
 	return 0;
 }
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -146,12 +146,11 @@ static bool has_notrack_prefix(struct in
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list)
+			    struct instruction *insn)
 {
+	struct list_head *ops_list = &insn->stack_ops;
 	const struct elf *elf = file->elf;
-	struct insn insn;
+	struct insn ins;
 	int x86_64, ret;
 	unsigned char op1, op2, op3, prefix,
 		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
@@ -165,42 +164,42 @@ int arch_decode_instruction(struct objto
 	if (x86_64 == -1)
 		return -1;
 
-	ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+	ret = insn_decode(&ins, sec->data->d_buf + offset, maxlen,
 			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
 	if (ret < 0) {
 		WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
 		return -1;
 	}
 
-	*len = insn.length;
-	*type = INSN_OTHER;
+	insn->len = ins.length;
+	insn->type = INSN_OTHER;
 
-	if (insn.vex_prefix.nbytes)
+	if (ins.vex_prefix.nbytes)
 		return 0;
 
-	prefix = insn.prefixes.bytes[0];
+	prefix = ins.prefixes.bytes[0];
 
-	op1 = insn.opcode.bytes[0];
-	op2 = insn.opcode.bytes[1];
-	op3 = insn.opcode.bytes[2];
+	op1 = ins.opcode.bytes[0];
+	op2 = ins.opcode.bytes[1];
+	op3 = ins.opcode.bytes[2];
 
-	if (insn.rex_prefix.nbytes) {
-		rex = insn.rex_prefix.bytes[0];
+	if (ins.rex_prefix.nbytes) {
+		rex = ins.rex_prefix.bytes[0];
 		rex_w = X86_REX_W(rex) >> 3;
 		rex_r = X86_REX_R(rex) >> 2;
 		rex_x = X86_REX_X(rex) >> 1;
 		rex_b = X86_REX_B(rex);
 	}
 
-	if (insn.modrm.nbytes) {
-		modrm = insn.modrm.bytes[0];
+	if (ins.modrm.nbytes) {
+		modrm = ins.modrm.bytes[0];
 		modrm_mod = X86_MODRM_MOD(modrm);
 		modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
 		modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
 	}
 
-	if (insn.sib.nbytes) {
-		sib = insn.sib.bytes[0];
+	if (ins.sib.nbytes) {
+		sib = ins.sib.bytes[0];
 		/* sib_scale = X86_SIB_SCALE(sib); */
 		sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
 		sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
@@ -254,7 +253,7 @@ int arch_decode_instruction(struct objto
 		break;
 
 	case 0x70 ... 0x7f:
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0x80 ... 0x83:
@@ -278,7 +277,7 @@ int arch_decode_instruction(struct objto
 		if (!rm_is_reg(CFI_SP))
 			break;
 
-		imm = insn.immediate.value;
+		imm = ins.immediate.value;
 		if (op1 & 2) { /* sign extend */
 			if (op1 & 1) { /* imm32 */
 				imm <<= 32;
@@ -309,7 +308,7 @@ int arch_decode_instruction(struct objto
 			ADD_OP(op) {
 				op->src.type = OP_SRC_AND;
 				op->src.reg = CFI_SP;
-				op->src.offset = insn.immediate.value;
+				op->src.offset = ins.immediate.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = CFI_SP;
 			}
@@ -356,7 +355,7 @@ int arch_decode_instruction(struct objto
 					op->src.reg = CFI_SP;
 					op->dest.type = OP_DEST_REG_INDIRECT;
 					op->dest.reg = modrm_rm;
-					op->dest.offset = insn.displacement.value;
+					op->dest.offset = ins.displacement.value;
 				}
 				break;
 			}
@@ -389,7 +388,7 @@ int arch_decode_instruction(struct objto
 				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_BP;
-				op->dest.offset = insn.displacement.value;
+				op->dest.offset = ins.displacement.value;
 			}
 			break;
 		}
@@ -402,7 +401,7 @@ int arch_decode_instruction(struct objto
 				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_SP;
-				op->dest.offset = insn.displacement.value;
+				op->dest.offset = ins.displacement.value;
 			}
 			break;
 		}
@@ -419,7 +418,7 @@ int arch_decode_instruction(struct objto
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_BP;
-				op->src.offset = insn.displacement.value;
+				op->src.offset = ins.displacement.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = modrm_reg;
 			}
@@ -432,7 +431,7 @@ int arch_decode_instruction(struct objto
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_SP;
-				op->src.offset = insn.displacement.value;
+				op->src.offset = ins.displacement.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = modrm_reg;
 			}
@@ -464,7 +463,7 @@ int arch_decode_instruction(struct objto
 
 		/* lea disp(%src), %dst */
 		ADD_OP(op) {
-			op->src.offset = insn.displacement.value;
+			op->src.offset = ins.displacement.value;
 			if (!op->src.offset) {
 				/* lea (%src), %dst */
 				op->src.type = OP_SRC_REG;
@@ -487,7 +486,7 @@ int arch_decode_instruction(struct objto
 		break;
 
 	case 0x90:
-		*type = INSN_NOP;
+		insn->type = INSN_NOP;
 		break;
 
 	case 0x9c:
@@ -511,39 +510,39 @@ int arch_decode_instruction(struct objto
 		if (op2 == 0x01) {
 
 			if (modrm == 0xca)
-				*type = INSN_CLAC;
+				insn->type = INSN_CLAC;
 			else if (modrm == 0xcb)
-				*type = INSN_STAC;
+				insn->type = INSN_STAC;
 
 		} else if (op2 >= 0x80 && op2 <= 0x8f) {
 
-			*type = INSN_JUMP_CONDITIONAL;
+			insn->type = INSN_JUMP_CONDITIONAL;
 
 		} else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
 			   op2 == 0x35) {
 
 			/* sysenter, sysret */
-			*type = INSN_CONTEXT_SWITCH;
+			insn->type = INSN_CONTEXT_SWITCH;
 
 		} else if (op2 == 0x0b || op2 == 0xb9) {
 
 			/* ud2 */
-			*type = INSN_BUG;
+			insn->type = INSN_BUG;
 
 		} else if (op2 == 0x0d || op2 == 0x1f) {
 
 			/* nopl/nopw */
-			*type = INSN_NOP;
+			insn->type = INSN_NOP;
 
 		} else if (op2 == 0x1e) {
 
 			if (prefix == 0xf3 && (modrm == 0xfa || modrm == 0xfb))
-				*type = INSN_ENDBR;
+				insn->type = INSN_ENDBR;
 
 
 		} else if (op2 == 0x38 && op3 == 0xf8) {
-			if (insn.prefixes.nbytes == 1 &&
-			    insn.prefixes.bytes[0] == 0xf2) {
+			if (ins.prefixes.nbytes == 1 &&
+			    ins.prefixes.bytes[0] == 0xf2) {
 				/* ENQCMD cannot be used in the kernel. */
 				WARN("ENQCMD instruction at %s:%lx", sec->name,
 				     offset);
@@ -591,29 +590,29 @@ int arch_decode_instruction(struct objto
 
 	case 0xcc:
 		/* int3 */
-		*type = INSN_TRAP;
+		insn->type = INSN_TRAP;
 		break;
 
 	case 0xe3:
 		/* jecxz/jrcxz */
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0xe9:
 	case 0xeb:
-		*type = INSN_JUMP_UNCONDITIONAL;
+		insn->type = INSN_JUMP_UNCONDITIONAL;
 		break;
 
 	case 0xc2:
 	case 0xc3:
-		*type = INSN_RETURN;
+		insn->type = INSN_RETURN;
 		break;
 
 	case 0xc7: /* mov imm, r/m */
 		if (!opts.noinstr)
 			break;
 
-		if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
+		if (ins.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
 			struct reloc *immr, *disp;
 			struct symbol *func;
 			int idx;
@@ -661,17 +660,17 @@ int arch_decode_instruction(struct objto
 
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
-		*type = INSN_CONTEXT_SWITCH;
+		insn->type = INSN_CONTEXT_SWITCH;
 		break;
 
 	case 0xe0: /* loopne */
 	case 0xe1: /* loope */
 	case 0xe2: /* loop */
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0xe8:
-		*type = INSN_CALL;
+		insn->type = INSN_CALL;
 		/*
 		 * For the impact on the stack, a CALL behaves like
 		 * a PUSH of an immediate value (the return address).
@@ -683,30 +682,30 @@ int arch_decode_instruction(struct objto
 		break;
 
 	case 0xfc:
-		*type = INSN_CLD;
+		insn->type = INSN_CLD;
 		break;
 
 	case 0xfd:
-		*type = INSN_STD;
+		insn->type = INSN_STD;
 		break;
 
 	case 0xff:
 		if (modrm_reg == 2 || modrm_reg == 3) {
 
-			*type = INSN_CALL_DYNAMIC;
-			if (has_notrack_prefix(&insn))
+			insn->type = INSN_CALL_DYNAMIC;
+			if (has_notrack_prefix(&ins))
 				WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
 		} else if (modrm_reg == 4) {
 
-			*type = INSN_JUMP_DYNAMIC;
-			if (has_notrack_prefix(&insn))
+			insn->type = INSN_JUMP_DYNAMIC;
+			if (has_notrack_prefix(&ins))
 				WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
 		} else if (modrm_reg == 5) {
 
 			/* jmpf */
-			*type = INSN_CONTEXT_SWITCH;
+			insn->type = INSN_CONTEXT_SWITCH;
 
 		} else if (modrm_reg == 6) {
 
@@ -723,7 +722,7 @@ int arch_decode_instruction(struct objto
 		break;
 	}
 
-	*immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
+	insn->immediate = ins.immediate.nbytes ? ins.immediate.value : 0;
 
 	return 0;
 }
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -404,9 +404,7 @@ static int decode_instructions(struct ob
 
 			ret = arch_decode_instruction(file, sec, offset,
 						      sec->sh.sh_size - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate,
-						      &insn->stack_ops);
+						      insn);
 			if (ret)
 				goto err;
 
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -75,9 +75,7 @@ void arch_initial_func_cfi_state(struct
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list);
+			    struct instruction *insn);
 
 bool arch_callee_saved_reg(unsigned char reg);
 



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 02/10] objtool: Make instruction::stack_ops a single-linked list
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
  2023-02-08 17:17 ` [PATCH 01/10] objtool: Change arch_decode_instruction() signature Peter Zijlstra
@ 2023-02-08 17:17 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:17 ` [PATCH 03/10] objtool: Make instruction::alts " Peter Zijlstra
                   ` (11 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:17 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	unsigned int               len;                  /*    64     4 */
 	enum insn_type             type;                 /*    68     4 */
 	long unsigned int          immediate;            /*    72     8 */
 	u16                        dead_end:1;           /*    80: 0  2 */
 	u16                        ignore:1;             /*    80: 1  2 */
 	u16                        ignore_alts:1;        /*    80: 2  2 */
 	u16                        hint:1;               /*    80: 3  2 */
 	u16                        save:1;               /*    80: 4  2 */
 	u16                        restore:1;            /*    80: 5  2 */
 	u16                        retpoline_safe:1;     /*    80: 6  2 */
 	u16                        noendbr:1;            /*    80: 7  2 */
 	u16                        entry:1;              /*    80: 8  2 */

 	/* XXX 7 bits hole, try to pack */

 	s8                         instr;                /*    82     1 */
 	u8                         visited;              /*    83     1 */

 	/* XXX 4 bytes hole, try to pack */

 	struct alt_group *         alt_group;            /*    88     8 */
 	struct symbol *            call_dest;            /*    96     8 */
 	struct instruction *       jump_dest;            /*   104     8 */
 	struct instruction *       first_jump_src;       /*   112     8 */
 	struct reloc *             jump_table;           /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct reloc *             reloc;                /*   128     8 */
 	struct list_head           alts;                 /*   136    16 */
 	struct symbol *            sym;                  /*   152     8 */
-	struct list_head           stack_ops;            /*   160    16 */
-	struct cfi_state *         cfi;                  /*   176     8 */
+	struct stack_op *          stack_ops;            /*   160     8 */
+	struct cfi_state *         cfi;                  /*   168     8 */

-	/* size: 184, cachelines: 3, members: 29 */
-	/* sum members: 178, holes: 1, sum holes: 4 */
+	/* size: 176, cachelines: 3, members: 29 */
+	/* sum members: 170, holes: 1, sum holes: 4 */
 	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 56 bytes */
+	/* last cacheline: 48 bytes */
 };

pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
post:	5:58.50 real,   229.64 user,    128.65 sys,     26221520 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/arch/x86/decode.c       |    4 ++--
 tools/objtool/check.c                 |   11 +++++------
 tools/objtool/include/objtool/arch.h  |    2 +-
 tools/objtool/include/objtool/check.h |    2 +-
 4 files changed, 9 insertions(+), 10 deletions(-)

--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -105,7 +105,7 @@ bool arch_pc_relative_reloc(struct reloc
 #define ADD_OP(op) \
 	if (!(op = calloc(1, sizeof(*op)))) \
 		return -1; \
-	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+	else for (*ops_list = op, ops_list = &op->next; op; op = NULL)
 
 /*
  * Helpers to decode ModRM/SIB:
@@ -148,7 +148,7 @@ int arch_decode_instruction(struct objto
 			    unsigned long offset, unsigned int maxlen,
 			    struct instruction *insn)
 {
-	struct list_head *ops_list = &insn->stack_ops;
+	struct stack_op **ops_list = &insn->stack_ops;
 	const struct elf *elf = file->elf;
 	struct insn ins;
 	int x86_64, ret;
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -396,7 +396,6 @@ static int decode_instructions(struct ob
 			}
 			memset(insn, 0, sizeof(*insn));
 			INIT_LIST_HEAD(&insn->alts);
-			INIT_LIST_HEAD(&insn->stack_ops);
 			INIT_LIST_HEAD(&insn->call_node);
 
 			insn->sec = sec;
@@ -1319,12 +1318,13 @@ static struct reloc *insn_reloc(struct o
 
 static void remove_insn_ops(struct instruction *insn)
 {
-	struct stack_op *op, *tmp;
+	struct stack_op *op, *next;
 
-	list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
-		list_del(&op->list);
+	for (op = insn->stack_ops; op; op = next) {
+		next = op->next;
 		free(op);
 	}
+	insn->stack_ops = NULL;
 }
 
 static void annotate_call_site(struct objtool_file *file,
@@ -1769,7 +1769,6 @@ static int handle_group_alt(struct objto
 		}
 		memset(nop, 0, sizeof(*nop));
 		INIT_LIST_HEAD(&nop->alts);
-		INIT_LIST_HEAD(&nop->stack_ops);
 
 		nop->sec = special_alt->new_sec;
 		nop->offset = special_alt->new_off + special_alt->new_len;
@@ -3214,7 +3213,7 @@ static int handle_insn_ops(struct instru
 {
 	struct stack_op *op;
 
-	list_for_each_entry(op, &insn->stack_ops, list) {
+	for (op = insn->stack_ops; op; op = op->next) {
 
 		if (update_cfi_state(insn, next_insn, &state->cfi, op))
 			return 1;
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -62,9 +62,9 @@ struct op_src {
 };
 
 struct stack_op {
+	struct stack_op *next;
 	struct op_dest dest;
 	struct op_src src;
-	struct list_head list;
 };
 
 struct instruction;
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -68,7 +68,7 @@ struct instruction {
 	struct reloc *reloc;
 	struct list_head alts;
 	struct symbol *sym;
-	struct list_head stack_ops;
+	struct stack_op *stack_ops;
 	struct cfi_state *cfi;
 };
 



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 03/10] objtool: Make instruction::alts a single-linked list
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
  2023-02-08 17:17 ` [PATCH 01/10] objtool: Change arch_decode_instruction() signature Peter Zijlstra
  2023-02-08 17:17 ` [PATCH 02/10] objtool: Make instruction::stack_ops a single-linked list Peter Zijlstra
@ 2023-02-08 17:17 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 04/10] objtool: Shrink instruction::{type,visited} Peter Zijlstra
                   ` (10 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:17 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	unsigned int               len;                  /*    64     4 */
 	enum insn_type             type;                 /*    68     4 */
 	long unsigned int          immediate;            /*    72     8 */
 	u16                        dead_end:1;           /*    80: 0  2 */
 	u16                        ignore:1;             /*    80: 1  2 */
 	u16                        ignore_alts:1;        /*    80: 2  2 */
 	u16                        hint:1;               /*    80: 3  2 */
 	u16                        save:1;               /*    80: 4  2 */
 	u16                        restore:1;            /*    80: 5  2 */
 	u16                        retpoline_safe:1;     /*    80: 6  2 */
 	u16                        noendbr:1;            /*    80: 7  2 */
 	u16                        entry:1;              /*    80: 8  2 */

 	/* XXX 7 bits hole, try to pack */

 	s8                         instr;                /*    82     1 */
 	u8                         visited;              /*    83     1 */

 	/* XXX 4 bytes hole, try to pack */

 	struct alt_group *         alt_group;            /*    88     8 */
 	struct symbol *            call_dest;            /*    96     8 */
 	struct instruction *       jump_dest;            /*   104     8 */
 	struct instruction *       first_jump_src;       /*   112     8 */
 	struct reloc *             jump_table;           /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct reloc *             reloc;                /*   128     8 */
-	struct list_head           alts;                 /*   136    16 */
-	struct symbol *            sym;                  /*   152     8 */
-	struct stack_op *          stack_ops;            /*   160     8 */
-	struct cfi_state *         cfi;                  /*   168     8 */
+	struct alternative *       alts;                 /*   136     8 */
+	struct symbol *            sym;                  /*   144     8 */
+	struct stack_op *          stack_ops;            /*   152     8 */
+	struct cfi_state *         cfi;                  /*   160     8 */

-	/* size: 176, cachelines: 3, members: 29 */
-	/* sum members: 170, holes: 1, sum holes: 4 */
+	/* size: 168, cachelines: 3, members: 29 */
+	/* sum members: 162, holes: 1, sum holes: 4 */
 	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 48 bytes */
+	/* last cacheline: 40 bytes */
 };

pre:	5:58.50 real,   229.64 user,    128.65 sys,     26221520 mem
post:	5:48.86 real,   220.30 user,    128.34 sys,     24834672 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                 |   18 +++++++++---------
 tools/objtool/include/objtool/check.h |    2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -23,7 +23,7 @@
 #include <linux/static_call_types.h>
 
 struct alternative {
-	struct list_head list;
+	struct alternative *next;
 	struct instruction *insn;
 	bool skip_orig;
 };
@@ -395,7 +395,6 @@ static int decode_instructions(struct ob
 				return -1;
 			}
 			memset(insn, 0, sizeof(*insn));
-			INIT_LIST_HEAD(&insn->alts);
 			INIT_LIST_HEAD(&insn->call_node);
 
 			insn->sec = sec;
@@ -1768,7 +1767,6 @@ static int handle_group_alt(struct objto
 			return -1;
 		}
 		memset(nop, 0, sizeof(*nop));
-		INIT_LIST_HEAD(&nop->alts);
 
 		nop->sec = special_alt->new_sec;
 		nop->offset = special_alt->new_off + special_alt->new_len;
@@ -1966,7 +1964,8 @@ static int add_special_section_alts(stru
 		alt->insn = new_insn;
 		alt->skip_orig = special_alt->skip_orig;
 		orig_insn->ignore_alts |= special_alt->skip_alt;
-		list_add_tail(&alt->list, &orig_insn->alts);
+		alt->next = orig_insn->alts;
+		orig_insn->alts = alt;
 
 		list_del(&special_alt->list);
 		free(special_alt);
@@ -2025,7 +2024,8 @@ static int add_jump_table(struct objtool
 		}
 
 		alt->insn = dest_insn;
-		list_add_tail(&alt->list, &insn->alts);
+		alt->next = insn->alts;
+		insn->alts = alt;
 		prev_offset = reloc->offset;
 	}
 
@@ -3576,10 +3576,10 @@ static int validate_branch(struct objtoo
 		if (propagate_alt_cfi(file, insn))
 			return 1;
 
-		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+		if (!insn->ignore_alts && insn->alts) {
 			bool skip_orig = false;
 
-			list_for_each_entry(alt, &insn->alts, list) {
+			for (alt = insn->alts; alt; alt = alt->next) {
 				if (alt->skip_orig)
 					skip_orig = true;
 
@@ -3778,11 +3778,11 @@ static int validate_entry(struct objtool
 
 		insn->visited |= VISITED_ENTRY;
 
-		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+		if (!insn->ignore_alts && insn->alts) {
 			struct alternative *alt;
 			bool skip_orig = false;
 
-			list_for_each_entry(alt, &insn->alts, list) {
+			for (alt = insn->alts; alt; alt = alt->next) {
 				if (alt->skip_orig)
 					skip_orig = true;
 
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -66,7 +66,7 @@ struct instruction {
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
 	struct reloc *reloc;
-	struct list_head alts;
+	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;
 	struct cfi_state *cfi;



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 04/10] objtool: Shrink instruction::{type,visited}
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (2 preceding siblings ...)
  2023-02-08 17:17 ` [PATCH 03/10] objtool: Make instruction::alts " Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 05/10] objtool: Remove instruction::reloc Peter Zijlstra
                   ` (9 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

Since we don't have that many types in enum insn_type, force it into a
u8 and re-arrange member to get rid of the holes, saves another 8
bytes.

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
-	unsigned int               len;                  /*    64     4 */
-	enum insn_type             type;                 /*    68     4 */
-	long unsigned int          immediate;            /*    72     8 */
-	u16                        dead_end:1;           /*    80: 0  2 */
-	u16                        ignore:1;             /*    80: 1  2 */
-	u16                        ignore_alts:1;        /*    80: 2  2 */
-	u16                        hint:1;               /*    80: 3  2 */
-	u16                        save:1;               /*    80: 4  2 */
-	u16                        restore:1;            /*    80: 5  2 */
-	u16                        retpoline_safe:1;     /*    80: 6  2 */
-	u16                        noendbr:1;            /*    80: 7  2 */
-	u16                        entry:1;              /*    80: 8  2 */
+	long unsigned int          immediate;            /*    64     8 */
+	unsigned int               len;                  /*    72     4 */
+	u8                         type;                 /*    76     1 */

-	/* XXX 7 bits hole, try to pack */
+	/* Bitfield combined with previous fields */

-	s8                         instr;                /*    82     1 */
-	u8                         visited;              /*    83     1 */
+	u16                        dead_end:1;           /*    76: 8  2 */
+	u16                        ignore:1;             /*    76: 9  2 */
+	u16                        ignore_alts:1;        /*    76:10  2 */
+	u16                        hint:1;               /*    76:11  2 */
+	u16                        save:1;               /*    76:12  2 */
+	u16                        restore:1;            /*    76:13  2 */
+	u16                        retpoline_safe:1;     /*    76:14  2 */
+	u16                        noendbr:1;            /*    76:15  2 */
+	u16                        entry:1;              /*    78: 0  2 */
+	u16                        visited:4;            /*    78: 1  2 */

-	/* XXX 4 bytes hole, try to pack */
+	/* XXX 3 bits hole, try to pack */
+	/* Bitfield combined with next fields */

-	struct alt_group *         alt_group;            /*    88     8 */
-	struct symbol *            call_dest;            /*    96     8 */
-	struct instruction *       jump_dest;            /*   104     8 */
-	struct instruction *       first_jump_src;       /*   112     8 */
-	struct reloc *             jump_table;           /*   120     8 */
+	s8                         instr;                /*    79     1 */
+	struct alt_group *         alt_group;            /*    80     8 */
+	struct symbol *            call_dest;            /*    88     8 */
+	struct instruction *       jump_dest;            /*    96     8 */
+	struct instruction *       first_jump_src;       /*   104     8 */
+	struct reloc *             jump_table;           /*   112     8 */
+	struct reloc *             reloc;                /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct reloc *             reloc;                /*   128     8 */
-	struct alternative *       alts;                 /*   136     8 */
-	struct symbol *            sym;                  /*   144     8 */
-	struct stack_op *          stack_ops;            /*   152     8 */
-	struct cfi_state *         cfi;                  /*   160     8 */
+	struct alternative *       alts;                 /*   128     8 */
+	struct symbol *            sym;                  /*   136     8 */
+	struct stack_op *          stack_ops;            /*   144     8 */
+	struct cfi_state *         cfi;                  /*   152     8 */

-	/* size: 168, cachelines: 3, members: 29 */
-	/* sum members: 162, holes: 1, sum holes: 4 */
-	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 40 bytes */
+	/* size: 160, cachelines: 3, members: 29 */
+	/* sum members: 158 */
+	/* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
+	/* last cacheline: 32 bytes */
 };

pre:	5:48.86 real,   220.30 user,    128.34 sys,     24834672 mem
post:	5:48.89 real,   220.96 user,    127.55 sys,     24834672 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/include/objtool/check.h |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -42,9 +42,9 @@ struct instruction {
 	struct list_head call_node;
 	struct section *sec;
 	unsigned long offset;
-	unsigned int len;
-	enum insn_type type;
 	unsigned long immediate;
+	unsigned int len;
+	u8 type;
 
 	u16 dead_end		: 1,
 	   ignore		: 1,
@@ -54,11 +54,11 @@ struct instruction {
 	   restore		: 1,
 	   retpoline_safe	: 1,
 	   noendbr		: 1,
-	   entry		: 1;
-		/* 7 bit hole */
+	   entry		: 1,
+	   visited		: 4;
+		/* 3 bit hole */
 
 	s8 instr;
-	u8 visited;
 
 	struct alt_group *alt_group;
 	struct symbol *call_dest;



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 05/10] objtool: Remove instruction::reloc
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (3 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 04/10] objtool: Shrink instruction::{type,visited} Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 06/10] objtool: Union instruction::{call_dest,jump_table} Peter Zijlstra
                   ` (8 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	long unsigned int          immediate;            /*    64     8 */
 	unsigned int               len;                  /*    72     4 */
 	u8                         type;                 /*    76     1 */

 	/* Bitfield combined with previous fields */

 	u16                        dead_end:1;           /*    76: 8  2 */
 	u16                        ignore:1;             /*    76: 9  2 */
 	u16                        ignore_alts:1;        /*    76:10  2 */
 	u16                        hint:1;               /*    76:11  2 */
 	u16                        save:1;               /*    76:12  2 */
 	u16                        restore:1;            /*    76:13  2 */
 	u16                        retpoline_safe:1;     /*    76:14  2 */
 	u16                        noendbr:1;            /*    76:15  2 */
 	u16                        entry:1;              /*    78: 0  2 */
 	u16                        visited:4;            /*    78: 1  2 */
+	u16                        no_reloc:1;           /*    78: 5  2 */

-	/* XXX 3 bits hole, try to pack */
+	/* XXX 2 bits hole, try to pack */
 	/* Bitfield combined with next fields */

 	s8                         instr;                /*    79     1 */
 	struct alt_group *         alt_group;            /*    80     8 */
 	struct symbol *            call_dest;            /*    88     8 */
 	struct instruction *       jump_dest;            /*    96     8 */
 	struct instruction *       first_jump_src;       /*   104     8 */
 	struct reloc *             jump_table;           /*   112     8 */
-	struct reloc *             reloc;                /*   120     8 */
+	struct alternative *       alts;                 /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct alternative *       alts;                 /*   128     8 */
-	struct symbol *            sym;                  /*   136     8 */
-	struct stack_op *          stack_ops;            /*   144     8 */
-	struct cfi_state *         cfi;                  /*   152     8 */
+	struct symbol *            sym;                  /*   128     8 */
+	struct stack_op *          stack_ops;            /*   136     8 */
+	struct cfi_state *         cfi;                  /*   144     8 */

-	/* size: 160, cachelines: 3, members: 29 */
-	/* sum members: 158 */
-	/* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
-	/* last cacheline: 32 bytes */
+	/* size: 152, cachelines: 3, members: 29 */
+	/* sum members: 150 */
+	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+	/* last cacheline: 24 bytes */
 };

pre:	5:48.89 real,   220.96 user,    127.55 sys,     24834672 mem
post:	5:39.35 real,   215.58 user,    123.69 sys,     23448736 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                 |   26 ++++++++++++--------------
 tools/objtool/include/objtool/check.h |    6 +++---
 2 files changed, 15 insertions(+), 17 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1307,26 +1307,24 @@ __weak bool arch_is_rethunk(struct symbo
 	return false;
 }
 
-#define NEGATIVE_RELOC	((void *)-1L)
-
 static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
 {
-	if (insn->reloc == NEGATIVE_RELOC)
+	struct reloc *reloc;
+
+	if (insn->no_reloc)
 		return NULL;
 
-	if (!insn->reloc) {
-		if (!file)
-			return NULL;
-
-		insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-						       insn->offset, insn->len);
-		if (!insn->reloc) {
-			insn->reloc = NEGATIVE_RELOC;
-			return NULL;
-		}
+	if (!file)
+		return NULL;
+
+	reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+					 insn->offset, insn->len);
+	if (!reloc) {
+		insn->no_reloc = 1;
+		return NULL;
 	}
 
-	return insn->reloc;
+	return reloc;
 }
 
 static void remove_insn_ops(struct instruction *insn)
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -55,8 +55,9 @@ struct instruction {
 	   retpoline_safe	: 1,
 	   noendbr		: 1,
 	   entry		: 1,
-	   visited		: 4;
-		/* 3 bit hole */
+	   visited		: 4,
+	   no_reloc		: 1;
+		/* 2 bit hole */
 
 	s8 instr;
 
@@ -65,7 +66,6 @@ struct instruction {
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
-	struct reloc *reloc;
 	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 06/10] objtool: Union instruction::{call_dest,jump_table}
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (4 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 05/10] objtool: Remove instruction::reloc Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 07/10] objtool: Fix overlapping alternatives Peter Zijlstra
                   ` (7 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	long unsigned int          immediate;            /*    64     8 */
 	unsigned int               len;                  /*    72     4 */
 	u8                         type;                 /*    76     1 */

 	/* Bitfield combined with previous fields */

 	u16                        dead_end:1;           /*    76: 8  2 */
 	u16                        ignore:1;             /*    76: 9  2 */
 	u16                        ignore_alts:1;        /*    76:10  2 */
 	u16                        hint:1;               /*    76:11  2 */
 	u16                        save:1;               /*    76:12  2 */
 	u16                        restore:1;            /*    76:13  2 */
 	u16                        retpoline_safe:1;     /*    76:14  2 */
 	u16                        noendbr:1;            /*    76:15  2 */
 	u16                        entry:1;              /*    78: 0  2 */
 	u16                        visited:4;            /*    78: 1  2 */
 	u16                        no_reloc:1;           /*    78: 5  2 */

 	/* XXX 2 bits hole, try to pack */
 	/* Bitfield combined with next fields */

 	s8                         instr;                /*    79     1 */
 	struct alt_group *         alt_group;            /*    80     8 */
-	struct symbol *            call_dest;            /*    88     8 */
-	struct instruction *       jump_dest;            /*    96     8 */
-	struct instruction *       first_jump_src;       /*   104     8 */
-	struct reloc *             jump_table;           /*   112     8 */
-	struct alternative *       alts;                 /*   120     8 */
+	struct instruction *       jump_dest;            /*    88     8 */
+	struct instruction *       first_jump_src;       /*    96     8 */
+	union {
+		struct symbol *    _call_dest;           /*   104     8 */
+		struct reloc *     _jump_table;          /*   104     8 */
+	};                                               /*   104     8 */
+	struct alternative *       alts;                 /*   112     8 */
+	struct symbol *            sym;                  /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct symbol *            sym;                  /*   128     8 */
-	struct stack_op *          stack_ops;            /*   136     8 */
-	struct cfi_state *         cfi;                  /*   144     8 */
+	struct stack_op *          stack_ops;            /*   128     8 */
+	struct cfi_state *         cfi;                  /*   136     8 */

-	/* size: 152, cachelines: 3, members: 29 */
-	/* sum members: 150 */
+	/* size: 144, cachelines: 3, members: 28 */
+	/* sum members: 142 */
 	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
-	/* last cacheline: 24 bytes */
+	/* last cacheline: 16 bytes */
 };

pre:	5:39.35 real,   215.58 user,    123.69 sys,     23448736 mem
post:	5:38.18 real,   213.25 user,    124.90 sys,     23449040 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                 |   73 +++++++++++++++++++++-------------
 tools/objtool/include/objtool/check.h |    6 +-
 2 files changed, 50 insertions(+), 29 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -114,16 +114,34 @@ static struct instruction *prev_insn_sam
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return NULL;
+
+	return insn->_call_dest;
+}
+
+static inline struct reloc *insn_jump_table(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return insn->_jump_table;
+
+	return NULL;
+}
+
 static bool is_jump_table_jump(struct instruction *insn)
 {
 	struct alt_group *alt_group = insn->alt_group;
 
-	if (insn->jump_table)
+	if (insn_jump_table(insn))
 		return true;
 
 	/* Retpoline alternative for a jump table? */
 	return alt_group && alt_group->orig_group &&
-	       alt_group->orig_group->first_insn->jump_table;
+	       insn_jump_table(alt_group->orig_group->first_insn);
 }
 
 static bool is_sibling_call(struct instruction *insn)
@@ -137,8 +155,8 @@ static bool is_sibling_call(struct instr
 			return !is_jump_table_jump(insn);
 	}
 
-	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
-	return (is_static_jump(insn) && insn->call_dest);
+	/* add_jump_destinations() sets insn_call_dest(insn) for sibling calls. */
+	return (is_static_jump(insn) && insn_call_dest(insn));
 }
 
 /*
@@ -273,8 +291,8 @@ static void init_insn_state(struct objto
 
 	/*
 	 * We need the full vmlinux for noinstr validation, otherwise we can
-	 * not correctly determine insn->call_dest->sec (external symbols do
-	 * not have a section).
+	 * not correctly determine insn_call_dest(insn)->sec (external symbols
+	 * do not have a section).
 	 */
 	if (opts.link && opts.noinstr && sec)
 		state->noinstr = sec->noinstr;
@@ -677,7 +695,7 @@ static int create_static_call_sections(s
 			return -1;
 
 		/* find key symbol */
-		key_name = strdup(insn->call_dest->name);
+		key_name = strdup(insn_call_dest(insn)->name);
 		if (!key_name) {
 			perror("strdup");
 			return -1;
@@ -708,7 +726,7 @@ static int create_static_call_sections(s
 			 * trampoline address.  This is fixed up in
 			 * static_call_add_module().
 			 */
-			key_sym = insn->call_dest;
+			key_sym = insn_call_dest(insn);
 		}
 		free(key_name);
 
@@ -1342,7 +1360,7 @@ static void annotate_call_site(struct ob
 			       struct instruction *insn, bool sibling)
 {
 	struct reloc *reloc = insn_reloc(file, insn);
-	struct symbol *sym = insn->call_dest;
+	struct symbol *sym = insn_call_dest(insn);
 
 	if (!sym)
 		sym = reloc->sym;
@@ -1427,7 +1445,7 @@ static void annotate_call_site(struct ob
 static void add_call_dest(struct objtool_file *file, struct instruction *insn,
 			  struct symbol *dest, bool sibling)
 {
-	insn->call_dest = dest;
+	insn->_call_dest = dest;
 	if (!dest)
 		return;
 
@@ -1685,12 +1703,12 @@ static int add_call_destinations(struct
 			if (insn->ignore)
 				continue;
 
-			if (!insn->call_dest) {
+			if (!insn_call_dest(insn)) {
 				WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
 				return -1;
 			}
 
-			if (insn_func(insn) && insn->call_dest->type != STT_FUNC) {
+			if (insn_func(insn) && insn_call_dest(insn)->type != STT_FUNC) {
 				WARN_FUNC("unsupported call to non-function",
 					  insn->sec, insn->offset);
 				return -1;
@@ -2127,7 +2145,7 @@ static void mark_func_jump_tables(struct
 		reloc = find_jump_table(file, func, insn);
 		if (reloc) {
 			reloc->jump_table_start = true;
-			insn->jump_table = reloc;
+			insn->_jump_table = reloc;
 		}
 	}
 }
@@ -2139,10 +2157,10 @@ static int add_func_jump_tables(struct o
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
-		if (!insn->jump_table)
+		if (!insn_jump_table(insn))
 			continue;
 
-		ret = add_jump_table(file, insn, insn->jump_table);
+		ret = add_jump_table(file, insn, insn_jump_table(insn));
 		if (ret)
 			return ret;
 	}
@@ -2614,8 +2632,8 @@ static int decode_sections(struct objtoo
 static bool is_fentry_call(struct instruction *insn)
 {
 	if (insn->type == INSN_CALL &&
-	    insn->call_dest &&
-	    insn->call_dest->fentry)
+	    insn_call_dest(insn) &&
+	    insn_call_dest(insn)->fentry)
 		return true;
 
 	return false;
@@ -3322,8 +3340,8 @@ static inline const char *call_dest_name
 	struct reloc *rel;
 	int idx;
 
-	if (insn->call_dest)
-		return insn->call_dest->name;
+	if (insn_call_dest(insn))
+		return insn_call_dest(insn)->name;
 
 	rel = insn_reloc(NULL, insn);
 	if (rel && !strcmp(rel->sym->name, "pv_ops")) {
@@ -3405,13 +3423,13 @@ static int validate_call(struct objtool_
 			 struct insn_state *state)
 {
 	if (state->noinstr && state->instr <= 0 &&
-	    !noinstr_call_dest(file, insn, insn->call_dest)) {
+	    !noinstr_call_dest(file, insn, insn_call_dest(insn))) {
 		WARN_FUNC("call to %s() leaves .noinstr.text section",
 				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
 	}
 
-	if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
+	if (state->uaccess && !func_uaccess_safe(insn_call_dest(insn))) {
 		WARN_FUNC("call to %s() with UACCESS enabled",
 				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
@@ -3849,11 +3867,11 @@ static int validate_entry(struct objtool
 
 			/* fallthrough */
 		case INSN_CALL:
-			dest = find_insn(file, insn->call_dest->sec,
-					 insn->call_dest->offset);
+			dest = find_insn(file, insn_call_dest(insn)->sec,
+					 insn_call_dest(insn)->offset);
 			if (!dest) {
 				WARN("Unresolved function after linking!?: %s",
-				     insn->call_dest->name);
+				     insn_call_dest(insn)->name);
 				return -1;
 			}
 
@@ -3954,13 +3972,13 @@ static int validate_retpoline(struct obj
 static bool is_kasan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+		!strcmp(insn_call_dest(insn)->name, "__asan_handle_no_return"));
 }
 
 static bool is_ubsan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name,
+		!strcmp(insn_call_dest(insn)->name,
 			"__ubsan_handle_builtin_unreachable"));
 }
 
@@ -4038,7 +4056,8 @@ static bool ignore_unreachable_insn(stru
 	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
 	prev_insn = list_prev_entry(insn, list);
-	if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
+	if ((prev_insn->dead_end ||
+	     dead_end_function(file, insn_call_dest(prev_insn))) &&
 	    (insn->type == INSN_BUG ||
 	     (insn->type == INSN_JUMP_UNCONDITIONAL &&
 	      insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -62,10 +62,12 @@ struct instruction {
 	s8 instr;
 
 	struct alt_group *alt_group;
-	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
-	struct reloc *jump_table;
+	union {
+		struct symbol *_call_dest;
+		struct reloc *_jump_table;
+	};
 	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 07/10] objtool: Fix overlapping alternatives
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (5 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 06/10] objtool: Union instruction::{call_dest,jump_table} Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 08/10] x86: Fix FILL_RETURN_BUFFER Peter Zijlstra
                   ` (6 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

Things like ALTERNATIVE_{2,3}() generate multiple alternatives on the
same place, objtool would override the first orig_alt_group with the
second (or third), failing to check the CFI among all the different
variants.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c |   69 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 26 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1732,36 +1732,49 @@ static int handle_group_alt(struct objto
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
-	struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+	struct instruction *last_new_insn = NULL, *insn, *nop = NULL;
 	struct alt_group *orig_alt_group, *new_alt_group;
 	unsigned long dest_off;
 
-
-	orig_alt_group = malloc(sizeof(*orig_alt_group));
+	orig_alt_group = orig_insn->alt_group;
 	if (!orig_alt_group) {
-		WARN("malloc failed");
-		return -1;
-	}
-	orig_alt_group->cfi = calloc(special_alt->orig_len,
-				     sizeof(struct cfi_state *));
-	if (!orig_alt_group->cfi) {
-		WARN("calloc failed");
-		return -1;
-	}
+		struct instruction *last_orig_insn = NULL;
 
-	last_orig_insn = NULL;
-	insn = orig_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-			break;
+		orig_alt_group = malloc(sizeof(*orig_alt_group));
+		if (!orig_alt_group) {
+			WARN("malloc failed");
+			return -1;
+		}
+		orig_alt_group->cfi = calloc(special_alt->orig_len,
+					     sizeof(struct cfi_state *));
+		if (!orig_alt_group->cfi) {
+			WARN("calloc failed");
+			return -1;
+		}
 
-		insn->alt_group = orig_alt_group;
-		last_orig_insn = insn;
-	}
-	orig_alt_group->orig_group = NULL;
-	orig_alt_group->first_insn = orig_insn;
-	orig_alt_group->last_insn = last_orig_insn;
+		insn = orig_insn;
+		sec_for_each_insn_from(file, insn) {
+			if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+				break;
 
+			insn->alt_group = orig_alt_group;
+			last_orig_insn = insn;
+		}
+		orig_alt_group->orig_group = NULL;
+		orig_alt_group->first_insn = orig_insn;
+		orig_alt_group->last_insn = last_orig_insn;
+	} else {
+		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
+		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
+			WARN_FUNC("weirdly overlapping alternative! %ld != %d",
+				  orig_insn->sec, orig_insn->offset,
+				  orig_alt_group->last_insn->offset +
+				  orig_alt_group->last_insn->len -
+				  orig_alt_group->first_insn->offset,
+				  special_alt->orig_len);
+			return -1;
+		}
+	}
 
 	new_alt_group = malloc(sizeof(*new_alt_group));
 	if (!new_alt_group) {
@@ -1836,7 +1849,7 @@ static int handle_group_alt(struct objto
 
 		dest_off = arch_jump_destination(insn);
 		if (dest_off == special_alt->new_off + special_alt->new_len) {
-			insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+			insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn);
 			if (!insn->jump_dest) {
 				WARN_FUNC("can't find alternative jump destination",
 					  insn->sec, insn->offset);
@@ -3214,8 +3227,12 @@ static int propagate_alt_cfi(struct objt
 		alt_cfi[group_off] = insn->cfi;
 	} else {
 		if (cficmp(alt_cfi[group_off], insn->cfi)) {
-			WARN_FUNC("stack layout conflict in alternatives",
-				  insn->sec, insn->offset);
+			struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group;
+			struct instruction *orig = orig_group->first_insn;
+			char *where = offstr(insn->sec, insn->offset);
+			WARN_FUNC("stack layout conflict in alternatives: %s",
+				  orig->sec, orig->offset, where);
+			free(where);
 			return -1;
 		}
 	}



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 08/10] x86: Fix FILL_RETURN_BUFFER
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (6 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 07/10] objtool: Fix overlapping alternatives Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 09/10] objtool: Remove instruction::list Peter Zijlstra
                   ` (5 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

With overlapping alternative validation fixed, objtool promptly
complains:

vmlinux.o: warning: objtool: __switch_to_asm+0x2c: stack layout conflict in alternatives: .altinstr_replacement+0x47

.rela.altinstructions:

000000000000009c  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 16dc
00000000000000a0  0000000600000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 3a
00000000000000a8  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 16dc
00000000000000ac  0000000600000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 66

.text:

00000000000016b0 <__switch_to_asm>:
    16b0:       f3 0f 1e fa             endbr64
    16b4:       55                      push   %rbp
    16b5:       53                      push   %rbx
    16b6:       41 54                   push   %r12
    16b8:       41 55                   push   %r13
    16ba:       41 56                   push   %r14
    16bc:       41 57                   push   %r15
    16be:       48 89 a7 18 0b 00 00    mov    %rsp,0xb18(%rdi)
    16c5:       48 8b a6 18 0b 00 00    mov    0xb18(%rsi),%rsp
    16cc:       48 8b 9e 28 05 00 00    mov    0x528(%rsi),%rbx
    16d3:       65 48 89 1c 25 00 00 00 00      mov    %rbx,%gs:0x0     16d8: R_X86_64_32S      fixed_percpu_data+0x28
    16dc:       eb 2a                   jmp    1708 <__switch_to_asm+0x58>
    16de:       90                      nop
    16df:       90                      nop
    16e0:       90                      nop
    16e1:       90                      nop
    16e2:       90                      nop
    16e3:       90                      nop
    16e4:       90                      nop
    16e5:       90                      nop
    16e6:       90                      nop
    16e7:       90                      nop
    16e8:       90                      nop
    16e9:       90                      nop
    16ea:       90                      nop
    16eb:       90                      nop
    16ec:       90                      nop
    16ed:       90                      nop
    16ee:       90                      nop
    16ef:       90                      nop
    16f0:       90                      nop
    16f1:       90                      nop
    16f2:       90                      nop
    16f3:       90                      nop
    16f4:       90                      nop
    16f5:       90                      nop
    16f6:       90                      nop
    16f7:       90                      nop
    16f8:       90                      nop
    16f9:       90                      nop
    16fa:       90                      nop
    16fb:       90                      nop
    16fc:       90                      nop
    16fd:       90                      nop
    16fe:       90                      nop
    16ff:       90                      nop
    1700:       90                      nop
    1701:       90                      nop
    1702:       90                      nop
    1703:       90                      nop
    1704:       90                      nop
    1705:       90                      nop
    1706:       90                      nop
    1707:       90                      nop
    1708:       41 5f                   pop    %r15
    170a:       41 5e                   pop    %r14
    170c:       41 5d                   pop    %r13
    170e:       41 5c                   pop    %r12
    1710:       5b                      pop    %rbx
    1711:       5d                      pop    %rbp
    1712:       e9 00 00 00 00          jmp    1717 <__switch_to_asm+0x67>      1713: R_X86_64_PLT32    __switch_to-0x4

.altinstr_replacement:

      3a:       49 c7 c4 10 00 00 00    mov    $0x10,%r12
      41:       e8 01 00 00 00          call   47 <.altinstr_replacement+0x47>
      46:       cc                      int3
      47:       e8 01 00 00 00          call   4d <.altinstr_replacement+0x4d>
      4c:       cc                      int3
      4d:       48 83 c4 10             add    $0x10,%rsp
      51:       49 ff cc                dec    %r12
      54:       75 eb                   jne    41 <.altinstr_replacement+0x41>
      56:       0f ae e8                lfence
      59:       65 48 c7 04 25 00 00 00 00 ff ff ff ff  movq   $0xffffffffffffffff,%gs:0x0      5e: R_X86_64_32S        pcpu_hot+0x10

      66:       e8 01 00 00 00          call   6c <.altinstr_replacement+0x6c>
      6b:       cc                      int3
      6c:       48 83 c4 08             add    $0x8,%rsp
      70:       0f ae e8                lfence

As can be seen from the two alternatives, when overlaid, the NOP after
the shorter (starting at 66) coinsides with the call at 47, leading to
conflicting CFI state for that instruction.

By offsetting the shorter alternative by 2 bytes, this alignment is
undone.

TODO: arguably objtool should be taught about the max nop length used
      by the kernel for tail padding, or unconditionally use JMP.d8 to
      not use the intervening bytes at all.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/nospec-branch.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -261,7 +261,7 @@
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
 	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
 		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
-		__stringify(__FILL_ONE_RETURN), \ftr2
+		__stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
 
 .Lskip_rsb_\@:
 .endm



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 09/10] objtool: Remove instruction::list
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (7 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 08/10] x86: Fix FILL_RETURN_BUFFER Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  2023-02-08 17:18 ` [PATCH 10/10][HACK] objtool: Shrink reloc Peter Zijlstra
                   ` (4 subsequent siblings)
  13 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.

 struct instruction {
-	struct list_head           list;                 /*     0    16 */
-	struct hlist_node          hash;                 /*    16    16 */
-	struct list_head           call_node;            /*    32    16 */
-	struct section *           sec;                  /*    48     8 */
-	long unsigned int          offset;               /*    56     8 */
-	/* --- cacheline 1 boundary (64 bytes) --- */
-	long unsigned int          immediate;            /*    64     8 */
-	unsigned int               len;                  /*    72     4 */
-	u8                         type;                 /*    76     1 */
-
-	/* Bitfield combined with previous fields */
+	struct hlist_node          hash;                 /*     0    16 */
+	struct list_head           call_node;            /*    16    16 */
+	struct section *           sec;                  /*    32     8 */
+	long unsigned int          offset;               /*    40     8 */
+	long unsigned int          immediate;            /*    48     8 */
+	u8                         len;                  /*    56     1 */
+	u8                         prev_len;             /*    57     1 */
+	u8                         type;                 /*    58     1 */
+	s8                         instr;                /*    59     1 */
+	u32                        idx:8;                /*    60: 0  4 */
+	u32                        dead_end:1;           /*    60: 8  4 */
+	u32                        ignore:1;             /*    60: 9  4 */
+	u32                        ignore_alts:1;        /*    60:10  4 */
+	u32                        hint:1;               /*    60:11  4 */
+	u32                        save:1;               /*    60:12  4 */
+	u32                        restore:1;            /*    60:13  4 */
+	u32                        retpoline_safe:1;     /*    60:14  4 */
+	u32                        noendbr:1;            /*    60:15  4 */
+	u32                        entry:1;              /*    60:16  4 */
+	u32                        visited:4;            /*    60:17  4 */
+	u32                        no_reloc:1;           /*    60:21  4 */

-	u16                        dead_end:1;           /*    76: 8  2 */
-	u16                        ignore:1;             /*    76: 9  2 */
-	u16                        ignore_alts:1;        /*    76:10  2 */
-	u16                        hint:1;               /*    76:11  2 */
-	u16                        save:1;               /*    76:12  2 */
-	u16                        restore:1;            /*    76:13  2 */
-	u16                        retpoline_safe:1;     /*    76:14  2 */
-	u16                        noendbr:1;            /*    76:15  2 */
-	u16                        entry:1;              /*    78: 0  2 */
-	u16                        visited:4;            /*    78: 1  2 */
-	u16                        no_reloc:1;           /*    78: 5  2 */
+	/* XXX 10 bits hole, try to pack */

-	/* XXX 2 bits hole, try to pack */
-	/* Bitfield combined with next fields */
-
-	s8                         instr;                /*    79     1 */
-	struct alt_group *         alt_group;            /*    80     8 */
-	struct instruction *       jump_dest;            /*    88     8 */
-	struct instruction *       first_jump_src;       /*    96     8 */
+	/* --- cacheline 1 boundary (64 bytes) --- */
+	struct alt_group *         alt_group;            /*    64     8 */
+	struct instruction *       jump_dest;            /*    72     8 */
+	struct instruction *       first_jump_src;       /*    80     8 */
 	union {
-		struct symbol *    _call_dest;           /*   104     8 */
-		struct reloc *     _jump_table;          /*   104     8 */
-	};                                               /*   104     8 */
-	struct alternative *       alts;                 /*   112     8 */
-	struct symbol *            sym;                  /*   120     8 */
-	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct stack_op *          stack_ops;            /*   128     8 */
-	struct cfi_state *         cfi;                  /*   136     8 */
+		struct symbol *    _call_dest;           /*    88     8 */
+		struct reloc *     _jump_table;          /*    88     8 */
+	};                                               /*    88     8 */
+	struct alternative *       alts;                 /*    96     8 */
+	struct symbol *            sym;                  /*   104     8 */
+	struct stack_op *          stack_ops;            /*   112     8 */
+	struct cfi_state *         cfi;                  /*   120     8 */

-	/* size: 144, cachelines: 3, members: 28 */
-	/* sum members: 142 */
-	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
-	/* last cacheline: 16 bytes */
+	/* size: 128, cachelines: 2, members: 29 */
+	/* sum members: 124 */
+	/* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
 };


pre:	5:38.18 real,   213.25 user,    124.90 sys,     23449040 mem
post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                   |  166 ++++++++++++++++++++------------
 tools/objtool/include/objtool/check.h   |   51 +++++----
 tools/objtool/include/objtool/objtool.h |    1 
 tools/objtool/objtool.c                 |    1 
 4 files changed, 133 insertions(+), 86 deletions(-)

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -47,27 +47,29 @@ struct instruction *find_insn(struct obj
 	return NULL;
 }
 
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-					      struct instruction *insn)
+struct instruction *next_insn_same_sec(struct objtool_file *file,
+				       struct instruction *insn)
 {
-	struct instruction *next = list_next_entry(insn, list);
+	if (insn->idx == INSN_CHUNK_MAX)
+		return find_insn(file, insn->sec, insn->offset + insn->len);
 
-	if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+	insn++;
+	if (!insn->len)
 		return NULL;
 
-	return next;
+	return insn;
 }
 
 static struct instruction *next_insn_same_func(struct objtool_file *file,
 					       struct instruction *insn)
 {
-	struct instruction *next = list_next_entry(insn, list);
+	struct instruction *next = next_insn_same_sec(file, insn);
 	struct symbol *func = insn_func(insn);
 
 	if (!func)
 		return NULL;
 
-	if (&next->list != &file->insn_list && insn_func(next) == func)
+	if (next && insn_func(next) == func)
 		return next;
 
 	/* Check if we're already in the subfunction: */
@@ -78,17 +80,35 @@ static struct instruction *next_insn_sam
 	return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
+static struct instruction *prev_insn_same_sec(struct objtool_file *file,
+					      struct instruction *insn)
+{
+	if (insn->idx == 0) {
+		if (insn->prev_len)
+			return find_insn(file, insn->sec, insn->offset - insn->prev_len);
+		return NULL;
+	}
+
+	return insn - 1;
+}
+
 static struct instruction *prev_insn_same_sym(struct objtool_file *file,
-					       struct instruction *insn)
+					      struct instruction *insn)
 {
-	struct instruction *prev = list_prev_entry(insn, list);
+	struct instruction *prev = prev_insn_same_sec(file, insn);
 
-	if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn))
+	if (prev && insn_func(prev) == insn_func(insn))
 		return prev;
 
 	return NULL;
 }
 
+#define for_each_insn(file, insn)					\
+	for (struct section *__sec, *__fake = (struct section *)1;	\
+	     __fake; __fake = NULL)					\
+		for_each_sec(file, __sec)				\
+			sec_for_each_insn(file, __sec, insn)
+
 #define func_for_each_insn(file, func, insn)				\
 	for (insn = find_insn(file, func->sec, func->offset);		\
 	     insn;							\
@@ -96,16 +116,13 @@ static struct instruction *prev_insn_sam
 
 #define sym_for_each_insn(file, sym, insn)				\
 	for (insn = find_insn(file, sym->sec, sym->offset);		\
-	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == sym->sec &&				\
-		insn->offset < sym->offset + sym->len;			\
-	     insn = list_next_entry(insn, list))
+	     insn && insn->offset < sym->offset + sym->len;		\
+	     insn = next_insn_same_sec(file, insn))
 
 #define sym_for_each_insn_continue_reverse(file, sym, insn)		\
-	for (insn = list_prev_entry(insn, list);			\
-	     &insn->list != &file->insn_list &&				\
-		insn->sec == sym->sec && insn->offset >= sym->offset;	\
-	     insn = list_prev_entry(insn, list))
+	for (insn = prev_insn_same_sec(file, insn);			\
+	     insn && insn->offset >= sym->offset;			\
+	     insn = prev_insn_same_sec(file, insn))
 
 #define sec_for_each_insn_from(file, insn)				\
 	for (; insn; insn = next_insn_same_sec(file, insn))
@@ -383,6 +400,9 @@ static int decode_instructions(struct ob
 	int ret;
 
 	for_each_sec(file, sec) {
+		struct instruction *insns = NULL;
+		u8 prev_len = 0;
+		u8 idx = 0;
 
 		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
 			continue;
@@ -407,22 +427,31 @@ static int decode_instructions(struct ob
 			sec->init = true;
 
 		for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
-			insn = malloc(sizeof(*insn));
-			if (!insn) {
-				WARN("malloc failed");
-				return -1;
+			if (!insns || idx == INSN_CHUNK_MAX) {
+				insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+				if (!insns) {
+					WARN("malloc failed");
+					return -1;
+				}
+				idx = 0;
+			} else {
+				idx++;
 			}
-			memset(insn, 0, sizeof(*insn));
-			INIT_LIST_HEAD(&insn->call_node);
+			insn = &insns[idx];
+			insn->idx = idx;
 
+			INIT_LIST_HEAD(&insn->call_node);
 			insn->sec = sec;
 			insn->offset = offset;
+			insn->prev_len = prev_len;
 
 			ret = arch_decode_instruction(file, sec, offset,
 						      sec->sh.sh_size - offset,
 						      insn);
 			if (ret)
-				goto err;
+				return ret;
+
+			prev_len = insn->len;
 
 			/*
 			 * By default, "ud2" is a dead end unless otherwise
@@ -433,10 +462,11 @@ static int decode_instructions(struct ob
 				insn->dead_end = true;
 
 			hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
-			list_add_tail(&insn->list, &file->insn_list);
 			nr_insns++;
 		}
 
+//		printf("%s: last chunk used: %d\n", sec->name, (int)idx);
+
 		list_for_each_entry(func, &sec->symbol_list, list) {
 			if (func->type != STT_NOTYPE && func->type != STT_FUNC)
 				continue;
@@ -479,10 +509,6 @@ static int decode_instructions(struct ob
 		printf("nr_insns: %lu\n", nr_insns);
 
 	return 0;
-
-err:
-	free(insn);
-	return ret;
 }
 
 /*
@@ -597,7 +623,7 @@ static int add_dead_ends(struct objtool_
 		}
 		insn = find_insn(file, reloc->sym->sec, reloc->addend);
 		if (insn)
-			insn = list_prev_entry(insn, list);
+			insn = prev_insn_same_sec(file, insn);
 		else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
 			insn = find_last_insn(file, reloc->sym->sec);
 			if (!insn) {
@@ -632,7 +658,7 @@ static int add_dead_ends(struct objtool_
 		}
 		insn = find_insn(file, reloc->sym->sec, reloc->addend);
 		if (insn)
-			insn = list_prev_entry(insn, list);
+			insn = prev_insn_same_sec(file, insn);
 		else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
 			insn = find_last_insn(file, reloc->sym->sec);
 			if (!insn) {
@@ -1763,6 +1789,7 @@ static int handle_group_alt(struct objto
 		orig_alt_group->orig_group = NULL;
 		orig_alt_group->first_insn = orig_insn;
 		orig_alt_group->last_insn = last_orig_insn;
+		orig_alt_group->nop = NULL;
 	} else {
 		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
 		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
@@ -1864,12 +1891,11 @@ static int handle_group_alt(struct objto
 		return -1;
 	}
 
-	if (nop)
-		list_add(&nop->list, &last_new_insn->list);
 end:
 	new_alt_group->orig_group = orig_alt_group;
 	new_alt_group->first_insn = *new_insn;
-	new_alt_group->last_insn = nop ? : last_new_insn;
+	new_alt_group->last_insn = last_new_insn;
+	new_alt_group->nop = nop;
 	new_alt_group->cfi = orig_alt_group->cfi;
 	return 0;
 }
@@ -1919,7 +1945,7 @@ static int handle_jump_alt(struct objtoo
 	else
 		file->jl_long++;
 
-	*new_insn = list_next_entry(orig_insn, list);
+	*new_insn = next_insn_same_sec(file, orig_insn);
 	return 0;
 }
 
@@ -3504,11 +3530,28 @@ static struct instruction *next_insn_to_
 	 * Simulate the fact that alternatives are patched in-place.  When the
 	 * end of a replacement alt_group is reached, redirect objtool flow to
 	 * the end of the original alt_group.
+	 *
+	 * insn->alts->insn -> alt_group->first_insn
+	 *		       ...
+	 *		       alt_group->last_insn
+	 *		       [alt_group->nop]      -> next(orig_group->last_insn)
 	 */
-	if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
-		return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+	if (alt_group) {
+		if (alt_group->nop) {
+			/* ->nop implies ->orig_group */
+			if (insn == alt_group->last_insn)
+				return alt_group->nop;
+			if (insn == alt_group->nop)
+				goto next_orig;
+		}
+		if (insn == alt_group->last_insn && alt_group->orig_group)
+			goto next_orig;
+	}
 
 	return next_insn_same_sec(file, insn);
+
+next_orig:
+	return next_insn_same_sec(file, alt_group->orig_group->last_insn);
 }
 
 /*
@@ -3759,11 +3802,25 @@ static int validate_branch(struct objtoo
 	return 0;
 }
 
+static int validate_unwind_hint(struct objtool_file *file,
+				  struct instruction *insn,
+				  struct insn_state *state)
+{
+	if (insn->hint && !insn->visited && !insn->ignore) {
+		int ret = validate_branch(file, insn_func(insn), insn, *state);
+		if (ret && opts.backtrace)
+			BT_FUNC("<=== (hint)", insn);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 {
 	struct instruction *insn;
 	struct insn_state state;
-	int ret, warnings = 0;
+	int warnings = 0;
 
 	if (!file->hints)
 		return 0;
@@ -3771,22 +3828,11 @@ static int validate_unwind_hints(struct
 	init_insn_state(file, &state, sec);
 
 	if (sec) {
-		insn = find_insn(file, sec, 0);
-		if (!insn)
-			return 0;
+		sec_for_each_insn(file, sec, insn)
+			warnings += validate_unwind_hint(file, insn, &state);
 	} else {
-		insn = list_first_entry(&file->insn_list, typeof(*insn), list);
-	}
-
-	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
-		if (insn->hint && !insn->visited && !insn->ignore) {
-			ret = validate_branch(file, insn_func(insn), insn, state);
-			if (ret && opts.backtrace)
-				BT_FUNC("<=== (hint)", insn);
-			warnings += ret;
-		}
-
-		insn = list_next_entry(insn, list);
+		for_each_insn(file, insn)
+			warnings += validate_unwind_hint(file, insn, &state);
 	}
 
 	return warnings;
@@ -4052,7 +4098,7 @@ static bool ignore_unreachable_insn(stru
 	 *
 	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
-	prev_insn = list_prev_entry(insn, list);
+	prev_insn = prev_insn_same_sec(file, insn);
 	if ((prev_insn->dead_end ||
 	     dead_end_function(file, insn_call_dest(prev_insn))) &&
 	    (insn->type == INSN_BUG ||
@@ -4084,7 +4130,7 @@ static bool ignore_unreachable_insn(stru
 		if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
 			break;
 
-		insn = list_next_entry(insn, list);
+		insn = next_insn_same_sec(file, insn);
 	}
 
 	return false;
@@ -4097,10 +4143,10 @@ static int add_prefix_symbol(struct objt
 		return 0;
 
 	for (;;) {
-		struct instruction *prev = list_prev_entry(insn, list);
+		struct instruction *prev = prev_insn_same_sec(file, insn);
 		u64 offset;
 
-		if (&prev->list == &file->insn_list)
+		if (!prev)
 			break;
 
 		if (prev->type != INSN_NOP)
@@ -4493,7 +4539,7 @@ int check(struct objtool_file *file)
 
 	warnings += ret;
 
-	if (list_empty(&file->insn_list))
+	if (!nr_insns)
 		goto out;
 
 	if (opts.retpoline) {
@@ -4602,7 +4648,7 @@ int check(struct objtool_file *file)
 		warnings += ret;
 	}
 
-	if (opts.orc && !list_empty(&file->insn_list)) {
+	if (opts.orc && nr_insns) {
 		ret = orc_create(file);
 		if (ret < 0)
 			goto out;
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -27,7 +27,7 @@ struct alt_group {
 	struct alt_group *orig_group;
 
 	/* First and last instructions in the group */
-	struct instruction *first_insn, *last_insn;
+	struct instruction *first_insn, *last_insn, *nop;
 
 	/*
 	 * Byte-offset-addressed len-sized array of pointers to CFI structs.
@@ -36,31 +36,36 @@ struct alt_group {
 	struct cfi_state **cfi;
 };
 
+#define INSN_CHUNK_BITS		8
+#define INSN_CHUNK_SIZE		(1 << INSN_CHUNK_BITS)
+#define INSN_CHUNK_MAX		(INSN_CHUNK_SIZE - 1)
+
 struct instruction {
-	struct list_head list;
 	struct hlist_node hash;
 	struct list_head call_node;
 	struct section *sec;
 	unsigned long offset;
 	unsigned long immediate;
-	unsigned int len;
-	u8 type;
-
-	u16 dead_end		: 1,
-	   ignore		: 1,
-	   ignore_alts		: 1,
-	   hint			: 1,
-	   save			: 1,
-	   restore		: 1,
-	   retpoline_safe	: 1,
-	   noendbr		: 1,
-	   entry		: 1,
-	   visited		: 4,
-	   no_reloc		: 1;
-		/* 2 bit hole */
 
+	u8 len;
+	u8 prev_len;
+	u8 type;
 	s8 instr;
 
+	u32 idx			: INSN_CHUNK_BITS,
+	    dead_end		: 1,
+	    ignore		: 1,
+	    ignore_alts		: 1,
+	    hint		: 1,
+	    save		: 1,
+	    restore		: 1,
+	    retpoline_safe	: 1,
+	    noendbr		: 1,
+	    entry		: 1,
+	    visited		: 4,
+	    no_reloc		: 1;
+		/* 10 bit hole */
+
 	struct alt_group *alt_group;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
@@ -109,13 +114,11 @@ static inline bool is_jump(struct instru
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
 
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
+struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruction *insn);
 
-#define sec_for_each_insn(file, sec, insn)				\
-	for (insn = find_insn(file, sec, 0);				\
-	     insn && &insn->list != &file->insn_list &&			\
-			insn->sec == sec;				\
-	     insn = list_next_entry(insn, list))
+#define sec_for_each_insn(file, _sec, insn)				\
+	for (insn = find_insn(file, _sec, 0);				\
+	     insn && insn->sec == _sec;					\
+	     insn = next_insn_same_sec(file, insn))
 
 #endif /* _CHECK_H */
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -21,7 +21,6 @@ struct pv_state {
 
 struct objtool_file {
 	struct elf *elf;
-	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 20);
 	struct list_head retpoline_call_list;
 	struct list_head return_thunk_list;
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -99,7 +99,6 @@ struct objtool_file *objtool_open_read(c
 		return NULL;
 	}
 
-	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	INIT_LIST_HEAD(&file.retpoline_call_list);
 	INIT_LIST_HEAD(&file.return_thunk_list);



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 10/10][HACK] objtool: Shrink reloc
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (8 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 09/10] objtool: Remove instruction::list Peter Zijlstra
@ 2023-02-08 17:18 ` Peter Zijlstra
  2023-02-08 18:35 ` [PATCH 00/10] objtool: Honey, I shrunk the instruction Nathan Chancellor
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-08 17:18 UTC (permalink / raw)
  To: x86, jpoimboe, linux; +Cc: linux-kernel, peterz

Glorious hack, do not merge. Good for another ~850M of allyesconfig
savings.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/include/objtool/elf.h |   12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -71,17 +71,23 @@ struct reloc {
 	union {
 		GElf_Rela rela;
 		GElf_Rel  rel;
+		struct {
+			u64 offset;
+			u64 __bar;
+			s64 addend;
+		};
 	};
 	struct section *sec;
 	struct symbol *sym;
 	struct list_head sym_reloc_entry;
-	unsigned long offset;
-	unsigned int type;
-	s64 addend;
 	int idx;
+	unsigned short type;
 	bool jump_table_start;
 };
 
+static_assert(offsetof(struct reloc, rela.r_offset) == offsetof(struct reloc, offset));
+static_assert(offsetof(struct reloc, rela.r_addend) == offsetof(struct reloc, addend));
+
 #define ELF_HASH_BITS	20
 
 struct elf {



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (9 preceding siblings ...)
  2023-02-08 17:18 ` [PATCH 10/10][HACK] objtool: Shrink reloc Peter Zijlstra
@ 2023-02-08 18:35 ` Nathan Chancellor
  2023-02-08 20:22 ` Damian Tometzki
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 37+ messages in thread
From: Nathan Chancellor @ 2023-02-08 18:35 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, linux, linux-kernel

On Wed, Feb 08, 2023 at 06:17:56PM +0100, Peter Zijlstra wrote:
> Hi,
> 
> Boris complained he could no longer build allyesconfig on his 32G desktop
> machine without having OOM terminate either objtool or chrome.
> 
> After talking about these patches on IRC, Nathan mentioned the linux-clang CI
> was also having trouble of recent, and these patches appear to make it happy
> again.
> 
> In total these patches shrink an allyesconfig run by about 6G:
> 
> pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
> post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem
> 
> Also at:
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core

For patches 1-9:

Tested-by: Nathan Chancellor <nathan@kernel.org> # build only

Prior to this series:

    [INFO] Memory used: 25.09GB

After this series:

    [INFO] Memory used: 19.27GB

Our builds on TuxSuite were consistenly timing out after four hours and
they had no problem passing with this series (the worst time was 2.2h,
which is line with the VM specs that they use I believe):

https://tuxapi.tuxsuite.com/v1/groups/clangbuiltlinux/projects/nathan/plans/2LQbNuWRo3Xf62Yg3SINuA9d7cR

Thanks a lot!

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (10 preceding siblings ...)
  2023-02-08 18:35 ` [PATCH 00/10] objtool: Honey, I shrunk the instruction Nathan Chancellor
@ 2023-02-08 20:22 ` Damian Tometzki
  2023-02-09 10:22   ` Peter Zijlstra
  2023-02-09 19:57 ` Josh Poimboeuf
  2023-02-10  1:50 ` [PATCH 00/10] objtool: Honey, I shrunk the instruction Thomas Weißschuh
  13 siblings, 1 reply; 37+ messages in thread
From: Damian Tometzki @ 2023-02-08 20:22 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, linux, linux-kernel

On Wed, 08. Feb 18:17, Peter Zijlstra wrote:
> Hi,
> 
> Boris complained he could no longer build allyesconfig on his 32G desktop
> machine without having OOM terminate either objtool or chrome.
> 
> After talking about these patches on IRC, Nathan mentioned the linux-clang CI
> was also having trouble of recent, and these patches appear to make it happy
> again.
> 
> In total these patches shrink an allyesconfig run by about 6G:
> 
> pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
> post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem
> 
> Also at:
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core
> 
Hello Peter,

with clang-17 the build failed: 
In file included from weak.c:10:
In file included from /home/damian/kernel/linux/tools/objtool/include/objtool/objtool.h:13:
/home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:88:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
static_assert(offsetof(struct reloc, rela.r_offset) == offsetof(struct reloc, offset));
                                                                                     ^
                                                                                     , ""
In file included from arch/x86/special.c:4:
In file included from /home/damian/kernel/linux/tools/objtool/include/objtool/special.h:10:
In file included from /home/damian/kernel/linux/tools/objtool/include/objtool/check.h:11/home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:
In file included from :89:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
/home/damian/kernel/linux/tools/objtool/include/objtool/arch.h:11:
In file included from static_assert(offsetof(struct reloc, rela.r_addend) == offsetof(struct reloc, addend));
                                                                                     ^
                                                                                     , ""
/home/damian/kernel/linux/tools/objtool/include/objtool/objtool.h:13:
/home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:88:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
static_assert(offsetof(struct reloc, rela.r_offset) == offsetof(struct reloc, offset));
                                                                                     ^
                                                                                     , ""
/home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:89:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
static_assert(offsetof(struct reloc, rela.r_addend) == offsetof(struct reloc, addend));
                                                                                     ^
                                                                                     , ""
2 errors generated.
In file included from builtin-check.c:10:
In file included from /home/damian/kernel/linux/tools/objtool/include/objtool/objtool.h:13:
/home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:88:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
static_assert(offsetof(struct reloc, rela.r_offset) == offsetof(struct reloc, offset));
...

Best regards
Damian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-08 20:22 ` Damian Tometzki
@ 2023-02-09 10:22   ` Peter Zijlstra
  2023-02-09 10:56     ` Damian Tometzki
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-09 10:22 UTC (permalink / raw)
  To: x86, jpoimboe, linux, linux-kernel

On Wed, Feb 08, 2023 at 09:22:02PM +0100, Damian Tometzki wrote:
> On Wed, 08. Feb 18:17, Peter Zijlstra wrote:
> > Hi,
> > 
> > Boris complained he could no longer build allyesconfig on his 32G desktop
> > machine without having OOM terminate either objtool or chrome.
> > 
> > After talking about these patches on IRC, Nathan mentioned the linux-clang CI
> > was also having trouble of recent, and these patches appear to make it happy
> > again.
> > 
> > In total these patches shrink an allyesconfig run by about 6G:
> > 
> > pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
> > post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem
> > 
> > Also at:
> > 
> >   https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core
> > 
> Hello Peter,
> 
> with clang-17 the build failed: 
> In file included from weak.c:10:
> In file included from /home/damian/kernel/linux/tools/objtool/include/objtool/objtool.h:13:
> /home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:88:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
> static_assert(offsetof(struct reloc, rela.r_offset) == offsetof(struct reloc, offset));

Oh urgh. Apparently the kernel wrapper went missing in this userspace
project :-)

include/linux/build_bug.h:#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)

Anyway, it's that last patch and I meant to do that in a slightly less
horrid way :-)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-09 10:22   ` Peter Zijlstra
@ 2023-02-09 10:56     ` Damian Tometzki
  0 siblings, 0 replies; 37+ messages in thread
From: Damian Tometzki @ 2023-02-09 10:56 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, linux, linux-kernel

On Thu, 09. Feb 11:22, Peter Zijlstra wrote:
> On Wed, Feb 08, 2023 at 09:22:02PM +0100, Damian Tometzki wrote:
> > On Wed, 08. Feb 18:17, Peter Zijlstra wrote:
> > > Hi,
> > > 
> > > Boris complained he could no longer build allyesconfig on his 32G desktop
> > > machine without having OOM terminate either objtool or chrome.
> > > 
> > > After talking about these patches on IRC, Nathan mentioned the linux-clang CI
> > > was also having trouble of recent, and these patches appear to make it happy
> > > again.
> > > 
> > > In total these patches shrink an allyesconfig run by about 6G:
> > > 
> > > pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
> > > post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem
> > > 
> > > Also at:
> > > 
> > >   https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core
> > > 
> > Hello Peter,
> > 
> > with clang-17 the build failed: 
> > In file included from weak.c:10:
> > In file included from /home/damian/kernel/linux/tools/objtool/include/objtool/objtool.h:13:
> > /home/damian/kernel/linux/tools/objtool/include/objtool/elf.h:88:86: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
> > static_assert(offsetof(struct reloc, rela.r_offset) == offsetof(struct reloc, offset));
> 
> Oh urgh. Apparently the kernel wrapper went missing in this userspace
> project :-)
> 
> include/linux/build_bug.h:#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
> 
> Anyway, it's that last patch and I meant to do that in a slightly less
> horrid way :-)

Hello Peter,

here a shortipossible fix: 
Then the build with clang works. 

diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 83b100c1e7f6..b7c8b476db95 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -31,7 +31,7 @@ INCLUDES := -I$(srctree)/tools/include \
            -I$(LIBSUBCMD_OUTPUT)/include
 # Note, EXTRA_WARNINGS here was determined for CC and not HOSTCC, it
 # is passed here to match a legacy behavior.
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs -Wno-c2x-extensions
 OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (11 preceding siblings ...)
  2023-02-08 20:22 ` Damian Tometzki
@ 2023-02-09 19:57 ` Josh Poimboeuf
  2023-02-10  8:59   ` Peter Zijlstra
  2023-02-10  1:50 ` [PATCH 00/10] objtool: Honey, I shrunk the instruction Thomas Weißschuh
  13 siblings, 1 reply; 37+ messages in thread
From: Josh Poimboeuf @ 2023-02-09 19:57 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, linux, linux-kernel

On Wed, Feb 08, 2023 at 06:17:56PM +0100, Peter Zijlstra wrote:
> Hi,
> 
> Boris complained he could no longer build allyesconfig on his 32G desktop
> machine without having OOM terminate either objtool or chrome.
> 
> After talking about these patches on IRC, Nathan mentioned the linux-clang CI
> was also having trouble of recent, and these patches appear to make it happy
> again.
> 
> In total these patches shrink an allyesconfig run by about 6G:
> 
> pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
> post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem
> 
> Also at:
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core

For patches 1-9:

Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>

To make the last patch legit we could just have reloc_offset(),
reloc_addend(), reloc_type() helpers to abstract access to rela/rel
fields.

And I'm sure there are other savings we could do to struct reloc, like
single-linked lists, bitfields, etc.

Let me know if you want me to do it.

-- 
Josh

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
                   ` (12 preceding siblings ...)
  2023-02-09 19:57 ` Josh Poimboeuf
@ 2023-02-10  1:50 ` Thomas Weißschuh
  13 siblings, 0 replies; 37+ messages in thread
From: Thomas Weißschuh @ 2023-02-10  1:50 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, jpoimboe, linux-kernel

On Wed, Feb 08, 2023 at 06:17:56PM +0100, Peter Zijlstra wrote:
> Hi,
> 
> Boris complained he could no longer build allyesconfig on his 32G desktop
> machine without having OOM terminate either objtool or chrome.
> 
> After talking about these patches on IRC, Nathan mentioned the linux-clang CI
> was also having trouble of recent, and these patches appear to make it happy
> again.
> 
> In total these patches shrink an allyesconfig run by about 6G:
> 
> pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
> post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem

Thanks!
This makes a huge difference on my memory-starved travel machine.
Seems to be back to about 6.0 levels.

Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run

> Also at:
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/core
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction
  2023-02-09 19:57 ` Josh Poimboeuf
@ 2023-02-10  8:59   ` Peter Zijlstra
  2023-02-10 13:47     ` [PATCH 00/10] objtool: Honey, I shrunk the instruction^Wreloc Peter Zijlstra
  0 siblings, 1 reply; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-10  8:59 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: x86, jpoimboe, linux, linux-kernel

On Thu, Feb 09, 2023 at 11:57:40AM -0800, Josh Poimboeuf wrote:

> To make the last patch legit we could just have reloc_offset(),
> reloc_addend(), reloc_type() helpers to abstract access to rela/rel
> fields.
> 
> And I'm sure there are other savings we could do to struct reloc, like
> single-linked lists, bitfields, etc.
> 
> Let me know if you want me to do it.

I'll make a start today -- I've got snot pouring out of me, so thinking
isn't going to happen, might as well just do the busy work :-)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 00/10] objtool: Honey, I shrunk the instruction^Wreloc
  2023-02-10  8:59   ` Peter Zijlstra
@ 2023-02-10 13:47     ` Peter Zijlstra
  0 siblings, 0 replies; 37+ messages in thread
From: Peter Zijlstra @ 2023-02-10 13:47 UTC (permalink / raw)
  To: Josh Poimboeuf; +Cc: x86, jpoimboe, linux, linux-kernel

On Fri, Feb 10, 2023 at 09:59:05AM +0100, Peter Zijlstra wrote:
> On Thu, Feb 09, 2023 at 11:57:40AM -0800, Josh Poimboeuf wrote:
> 
> > To make the last patch legit we could just have reloc_offset(),
> > reloc_addend(), reloc_type() helpers to abstract access to rela/rel
> > fields.
> > 
> > And I'm sure there are other savings we could do to struct reloc, like
> > single-linked lists, bitfields, etc.
> > 
> > Let me know if you want me to do it.
> 
> I'll make a start today -- I've got snot pouring out of me, so thinking
> isn't going to happen, might as well just do the busy work :-)

(lightly tested...)

Shrinks reloc from 120 to 80 bytes:

  https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=objtool/shrink-reloc

pre:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem
post:	4:12.71 real,   188.21 user,    64.49 sys,      18144848 mem

(still running on the exact same allyesconfig image)

I can probably get rid of symbol::reloc_list if I rewrite that whole
symtable trainwreck and I think I can also get rid of
instruction::alt_group, but both of those are a bit more work.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Remove instruction::list
  2023-02-08 17:18 ` [PATCH 09/10] objtool: Remove instruction::list Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     5a10ca6e6400d472553fda72e51e5ba598d24e1f
Gitweb:        https://git.kernel.org/tip/5a10ca6e6400d472553fda72e51e5ba598d24e1f
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:05 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:09 +01:00

objtool: Remove instruction::list

Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.

 struct instruction {
-	struct list_head           list;                 /*     0    16 */
-	struct hlist_node          hash;                 /*    16    16 */
-	struct list_head           call_node;            /*    32    16 */
-	struct section *           sec;                  /*    48     8 */
-	long unsigned int          offset;               /*    56     8 */
-	/* --- cacheline 1 boundary (64 bytes) --- */
-	long unsigned int          immediate;            /*    64     8 */
-	unsigned int               len;                  /*    72     4 */
-	u8                         type;                 /*    76     1 */
-
-	/* Bitfield combined with previous fields */
+	struct hlist_node          hash;                 /*     0    16 */
+	struct list_head           call_node;            /*    16    16 */
+	struct section *           sec;                  /*    32     8 */
+	long unsigned int          offset;               /*    40     8 */
+	long unsigned int          immediate;            /*    48     8 */
+	u8                         len;                  /*    56     1 */
+	u8                         prev_len;             /*    57     1 */
+	u8                         type;                 /*    58     1 */
+	s8                         instr;                /*    59     1 */
+	u32                        idx:8;                /*    60: 0  4 */
+	u32                        dead_end:1;           /*    60: 8  4 */
+	u32                        ignore:1;             /*    60: 9  4 */
+	u32                        ignore_alts:1;        /*    60:10  4 */
+	u32                        hint:1;               /*    60:11  4 */
+	u32                        save:1;               /*    60:12  4 */
+	u32                        restore:1;            /*    60:13  4 */
+	u32                        retpoline_safe:1;     /*    60:14  4 */
+	u32                        noendbr:1;            /*    60:15  4 */
+	u32                        entry:1;              /*    60:16  4 */
+	u32                        visited:4;            /*    60:17  4 */
+	u32                        no_reloc:1;           /*    60:21  4 */

-	u16                        dead_end:1;           /*    76: 8  2 */
-	u16                        ignore:1;             /*    76: 9  2 */
-	u16                        ignore_alts:1;        /*    76:10  2 */
-	u16                        hint:1;               /*    76:11  2 */
-	u16                        save:1;               /*    76:12  2 */
-	u16                        restore:1;            /*    76:13  2 */
-	u16                        retpoline_safe:1;     /*    76:14  2 */
-	u16                        noendbr:1;            /*    76:15  2 */
-	u16                        entry:1;              /*    78: 0  2 */
-	u16                        visited:4;            /*    78: 1  2 */
-	u16                        no_reloc:1;           /*    78: 5  2 */
+	/* XXX 10 bits hole, try to pack */

-	/* XXX 2 bits hole, try to pack */
-	/* Bitfield combined with next fields */
-
-	s8                         instr;                /*    79     1 */
-	struct alt_group *         alt_group;            /*    80     8 */
-	struct instruction *       jump_dest;            /*    88     8 */
-	struct instruction *       first_jump_src;       /*    96     8 */
+	/* --- cacheline 1 boundary (64 bytes) --- */
+	struct alt_group *         alt_group;            /*    64     8 */
+	struct instruction *       jump_dest;            /*    72     8 */
+	struct instruction *       first_jump_src;       /*    80     8 */
 	union {
-		struct symbol *    _call_dest;           /*   104     8 */
-		struct reloc *     _jump_table;          /*   104     8 */
-	};                                               /*   104     8 */
-	struct alternative *       alts;                 /*   112     8 */
-	struct symbol *            sym;                  /*   120     8 */
-	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct stack_op *          stack_ops;            /*   128     8 */
-	struct cfi_state *         cfi;                  /*   136     8 */
+		struct symbol *    _call_dest;           /*    88     8 */
+		struct reloc *     _jump_table;          /*    88     8 */
+	};                                               /*    88     8 */
+	struct alternative *       alts;                 /*    96     8 */
+	struct symbol *            sym;                  /*   104     8 */
+	struct stack_op *          stack_ops;            /*   112     8 */
+	struct cfi_state *         cfi;                  /*   120     8 */

-	/* size: 144, cachelines: 3, members: 28 */
-	/* sum members: 142 */
-	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
-	/* last cacheline: 16 bytes */
+	/* size: 128, cachelines: 2, members: 29 */
+	/* sum members: 124 */
+	/* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
 };

pre:	5:38.18 real,   213.25 user,    124.90 sys,     23449040 mem
post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
---
 tools/objtool/check.c                   | 166 ++++++++++++++---------
 tools/objtool/include/objtool/check.h   |  51 +++----
 tools/objtool/include/objtool/objtool.h |   1 +-
 tools/objtool/objtool.c                 |   1 +-
 4 files changed, 133 insertions(+), 86 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 7e9d3d3..b0b467d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -47,27 +47,29 @@ struct instruction *find_insn(struct objtool_file *file,
 	return NULL;
 }
 
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-					      struct instruction *insn)
+struct instruction *next_insn_same_sec(struct objtool_file *file,
+				       struct instruction *insn)
 {
-	struct instruction *next = list_next_entry(insn, list);
+	if (insn->idx == INSN_CHUNK_MAX)
+		return find_insn(file, insn->sec, insn->offset + insn->len);
 
-	if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+	insn++;
+	if (!insn->len)
 		return NULL;
 
-	return next;
+	return insn;
 }
 
 static struct instruction *next_insn_same_func(struct objtool_file *file,
 					       struct instruction *insn)
 {
-	struct instruction *next = list_next_entry(insn, list);
+	struct instruction *next = next_insn_same_sec(file, insn);
 	struct symbol *func = insn_func(insn);
 
 	if (!func)
 		return NULL;
 
-	if (&next->list != &file->insn_list && insn_func(next) == func)
+	if (next && insn_func(next) == func)
 		return next;
 
 	/* Check if we're already in the subfunction: */
@@ -78,17 +80,35 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 	return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
+static struct instruction *prev_insn_same_sec(struct objtool_file *file,
+					      struct instruction *insn)
+{
+	if (insn->idx == 0) {
+		if (insn->prev_len)
+			return find_insn(file, insn->sec, insn->offset - insn->prev_len);
+		return NULL;
+	}
+
+	return insn - 1;
+}
+
 static struct instruction *prev_insn_same_sym(struct objtool_file *file,
-					       struct instruction *insn)
+					      struct instruction *insn)
 {
-	struct instruction *prev = list_prev_entry(insn, list);
+	struct instruction *prev = prev_insn_same_sec(file, insn);
 
-	if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn))
+	if (prev && insn_func(prev) == insn_func(insn))
 		return prev;
 
 	return NULL;
 }
 
+#define for_each_insn(file, insn)					\
+	for (struct section *__sec, *__fake = (struct section *)1;	\
+	     __fake; __fake = NULL)					\
+		for_each_sec(file, __sec)				\
+			sec_for_each_insn(file, __sec, insn)
+
 #define func_for_each_insn(file, func, insn)				\
 	for (insn = find_insn(file, func->sec, func->offset);		\
 	     insn;							\
@@ -96,16 +116,13 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 
 #define sym_for_each_insn(file, sym, insn)				\
 	for (insn = find_insn(file, sym->sec, sym->offset);		\
-	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == sym->sec &&				\
-		insn->offset < sym->offset + sym->len;			\
-	     insn = list_next_entry(insn, list))
+	     insn && insn->offset < sym->offset + sym->len;		\
+	     insn = next_insn_same_sec(file, insn))
 
 #define sym_for_each_insn_continue_reverse(file, sym, insn)		\
-	for (insn = list_prev_entry(insn, list);			\
-	     &insn->list != &file->insn_list &&				\
-		insn->sec == sym->sec && insn->offset >= sym->offset;	\
-	     insn = list_prev_entry(insn, list))
+	for (insn = prev_insn_same_sec(file, insn);			\
+	     insn && insn->offset >= sym->offset;			\
+	     insn = prev_insn_same_sec(file, insn))
 
 #define sec_for_each_insn_from(file, insn)				\
 	for (; insn; insn = next_insn_same_sec(file, insn))
@@ -384,6 +401,9 @@ static int decode_instructions(struct objtool_file *file)
 	int ret;
 
 	for_each_sec(file, sec) {
+		struct instruction *insns = NULL;
+		u8 prev_len = 0;
+		u8 idx = 0;
 
 		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
 			continue;
@@ -409,22 +429,31 @@ static int decode_instructions(struct objtool_file *file)
 			sec->init = true;
 
 		for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
-			insn = malloc(sizeof(*insn));
-			if (!insn) {
-				WARN("malloc failed");
-				return -1;
+			if (!insns || idx == INSN_CHUNK_MAX) {
+				insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+				if (!insns) {
+					WARN("malloc failed");
+					return -1;
+				}
+				idx = 0;
+			} else {
+				idx++;
 			}
-			memset(insn, 0, sizeof(*insn));
-			INIT_LIST_HEAD(&insn->call_node);
+			insn = &insns[idx];
+			insn->idx = idx;
 
+			INIT_LIST_HEAD(&insn->call_node);
 			insn->sec = sec;
 			insn->offset = offset;
+			insn->prev_len = prev_len;
 
 			ret = arch_decode_instruction(file, sec, offset,
 						      sec->sh.sh_size - offset,
 						      insn);
 			if (ret)
-				goto err;
+				return ret;
+
+			prev_len = insn->len;
 
 			/*
 			 * By default, "ud2" is a dead end unless otherwise
@@ -435,10 +464,11 @@ static int decode_instructions(struct objtool_file *file)
 				insn->dead_end = true;
 
 			hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
-			list_add_tail(&insn->list, &file->insn_list);
 			nr_insns++;
 		}
 
+//		printf("%s: last chunk used: %d\n", sec->name, (int)idx);
+
 		list_for_each_entry(func, &sec->symbol_list, list) {
 			if (func->type != STT_NOTYPE && func->type != STT_FUNC)
 				continue;
@@ -481,10 +511,6 @@ static int decode_instructions(struct objtool_file *file)
 		printf("nr_insns: %lu\n", nr_insns);
 
 	return 0;
-
-err:
-	free(insn);
-	return ret;
 }
 
 /*
@@ -599,7 +625,7 @@ static int add_dead_ends(struct objtool_file *file)
 		}
 		insn = find_insn(file, reloc->sym->sec, reloc->addend);
 		if (insn)
-			insn = list_prev_entry(insn, list);
+			insn = prev_insn_same_sec(file, insn);
 		else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
 			insn = find_last_insn(file, reloc->sym->sec);
 			if (!insn) {
@@ -634,7 +660,7 @@ reachable:
 		}
 		insn = find_insn(file, reloc->sym->sec, reloc->addend);
 		if (insn)
-			insn = list_prev_entry(insn, list);
+			insn = prev_insn_same_sec(file, insn);
 		else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
 			insn = find_last_insn(file, reloc->sym->sec);
 			if (!insn) {
@@ -1775,6 +1801,7 @@ static int handle_group_alt(struct objtool_file *file,
 		orig_alt_group->orig_group = NULL;
 		orig_alt_group->first_insn = orig_insn;
 		orig_alt_group->last_insn = last_orig_insn;
+		orig_alt_group->nop = NULL;
 	} else {
 		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
 		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
@@ -1876,12 +1903,11 @@ static int handle_group_alt(struct objtool_file *file,
 		return -1;
 	}
 
-	if (nop)
-		list_add(&nop->list, &last_new_insn->list);
 end:
 	new_alt_group->orig_group = orig_alt_group;
 	new_alt_group->first_insn = *new_insn;
-	new_alt_group->last_insn = nop ? : last_new_insn;
+	new_alt_group->last_insn = last_new_insn;
+	new_alt_group->nop = nop;
 	new_alt_group->cfi = orig_alt_group->cfi;
 	return 0;
 }
@@ -1931,7 +1957,7 @@ static int handle_jump_alt(struct objtool_file *file,
 	else
 		file->jl_long++;
 
-	*new_insn = list_next_entry(orig_insn, list);
+	*new_insn = next_insn_same_sec(file, orig_insn);
 	return 0;
 }
 
@@ -3522,11 +3548,28 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file,
 	 * Simulate the fact that alternatives are patched in-place.  When the
 	 * end of a replacement alt_group is reached, redirect objtool flow to
 	 * the end of the original alt_group.
+	 *
+	 * insn->alts->insn -> alt_group->first_insn
+	 *		       ...
+	 *		       alt_group->last_insn
+	 *		       [alt_group->nop]      -> next(orig_group->last_insn)
 	 */
-	if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
-		return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+	if (alt_group) {
+		if (alt_group->nop) {
+			/* ->nop implies ->orig_group */
+			if (insn == alt_group->last_insn)
+				return alt_group->nop;
+			if (insn == alt_group->nop)
+				goto next_orig;
+		}
+		if (insn == alt_group->last_insn && alt_group->orig_group)
+			goto next_orig;
+	}
 
 	return next_insn_same_sec(file, insn);
+
+next_orig:
+	return next_insn_same_sec(file, alt_group->orig_group->last_insn);
 }
 
 /*
@@ -3777,11 +3820,25 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 	return 0;
 }
 
+static int validate_unwind_hint(struct objtool_file *file,
+				  struct instruction *insn,
+				  struct insn_state *state)
+{
+	if (insn->hint && !insn->visited && !insn->ignore) {
+		int ret = validate_branch(file, insn_func(insn), insn, *state);
+		if (ret && opts.backtrace)
+			BT_FUNC("<=== (hint)", insn);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 {
 	struct instruction *insn;
 	struct insn_state state;
-	int ret, warnings = 0;
+	int warnings = 0;
 
 	if (!file->hints)
 		return 0;
@@ -3789,22 +3846,11 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 	init_insn_state(file, &state, sec);
 
 	if (sec) {
-		insn = find_insn(file, sec, 0);
-		if (!insn)
-			return 0;
+		sec_for_each_insn(file, sec, insn)
+			warnings += validate_unwind_hint(file, insn, &state);
 	} else {
-		insn = list_first_entry(&file->insn_list, typeof(*insn), list);
-	}
-
-	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
-		if (insn->hint && !insn->visited && !insn->ignore) {
-			ret = validate_branch(file, insn_func(insn), insn, state);
-			if (ret && opts.backtrace)
-				BT_FUNC("<=== (hint)", insn);
-			warnings += ret;
-		}
-
-		insn = list_next_entry(insn, list);
+		for_each_insn(file, insn)
+			warnings += validate_unwind_hint(file, insn, &state);
 	}
 
 	return warnings;
@@ -4070,7 +4116,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	 *
 	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
-	prev_insn = list_prev_entry(insn, list);
+	prev_insn = prev_insn_same_sec(file, insn);
 	if ((prev_insn->dead_end ||
 	     dead_end_function(file, insn_call_dest(prev_insn))) &&
 	    (insn->type == INSN_BUG ||
@@ -4102,7 +4148,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 		if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
 			break;
 
-		insn = list_next_entry(insn, list);
+		insn = next_insn_same_sec(file, insn);
 	}
 
 	return false;
@@ -4115,10 +4161,10 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func,
 		return 0;
 
 	for (;;) {
-		struct instruction *prev = list_prev_entry(insn, list);
+		struct instruction *prev = prev_insn_same_sec(file, insn);
 		u64 offset;
 
-		if (&prev->list == &file->insn_list)
+		if (!prev)
 			break;
 
 		if (prev->type != INSN_NOP)
@@ -4517,7 +4563,7 @@ int check(struct objtool_file *file)
 
 	warnings += ret;
 
-	if (list_empty(&file->insn_list))
+	if (!nr_insns)
 		goto out;
 
 	if (opts.retpoline) {
@@ -4626,7 +4672,7 @@ int check(struct objtool_file *file)
 		warnings += ret;
 	}
 
-	if (opts.orc && !list_empty(&file->insn_list)) {
+	if (opts.orc && nr_insns) {
 		ret = orc_create(file);
 		if (ret < 0)
 			goto out;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index ab6deae..3e7c700 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -27,7 +27,7 @@ struct alt_group {
 	struct alt_group *orig_group;
 
 	/* First and last instructions in the group */
-	struct instruction *first_insn, *last_insn;
+	struct instruction *first_insn, *last_insn, *nop;
 
 	/*
 	 * Byte-offset-addressed len-sized array of pointers to CFI structs.
@@ -36,31 +36,36 @@ struct alt_group {
 	struct cfi_state **cfi;
 };
 
+#define INSN_CHUNK_BITS		8
+#define INSN_CHUNK_SIZE		(1 << INSN_CHUNK_BITS)
+#define INSN_CHUNK_MAX		(INSN_CHUNK_SIZE - 1)
+
 struct instruction {
-	struct list_head list;
 	struct hlist_node hash;
 	struct list_head call_node;
 	struct section *sec;
 	unsigned long offset;
 	unsigned long immediate;
-	unsigned int len;
-	u8 type;
-
-	u16 dead_end		: 1,
-	   ignore		: 1,
-	   ignore_alts		: 1,
-	   hint			: 1,
-	   save			: 1,
-	   restore		: 1,
-	   retpoline_safe	: 1,
-	   noendbr		: 1,
-	   entry		: 1,
-	   visited		: 4,
-	   no_reloc		: 1;
-		/* 2 bit hole */
 
+	u8 len;
+	u8 prev_len;
+	u8 type;
 	s8 instr;
 
+	u32 idx			: INSN_CHUNK_BITS,
+	    dead_end		: 1,
+	    ignore		: 1,
+	    ignore_alts		: 1,
+	    hint		: 1,
+	    save		: 1,
+	    restore		: 1,
+	    retpoline_safe	: 1,
+	    noendbr		: 1,
+	    entry		: 1,
+	    visited		: 4,
+	    no_reloc		: 1;
+		/* 10 bit hole */
+
 	struct alt_group *alt_group;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
@@ -109,13 +114,11 @@ static inline bool is_jump(struct instruction *insn)
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
 
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
+struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruction *insn);
 
-#define sec_for_each_insn(file, sec, insn)				\
-	for (insn = find_insn(file, sec, 0);				\
-	     insn && &insn->list != &file->insn_list &&			\
-			insn->sec == sec;				\
-	     insn = list_next_entry(insn, list))
+#define sec_for_each_insn(file, _sec, insn)				\
+	for (insn = find_insn(file, _sec, 0);				\
+	     insn && insn->sec == _sec;					\
+	     insn = next_insn_same_sec(file, insn))
 
 #endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 6b40977..94a33ee 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -21,7 +21,6 @@ struct pv_state {
 
 struct objtool_file {
 	struct elf *elf;
-	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 20);
 	struct list_head retpoline_call_list;
 	struct list_head return_thunk_list;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 6affd80..c54f723 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -99,7 +99,6 @@ struct objtool_file *objtool_open_read(const char *_objname)
 		return NULL;
 	}
 
-	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	INIT_LIST_HEAD(&file.retpoline_call_list);
 	INIT_LIST_HEAD(&file.return_thunk_list);

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Fix overlapping alternatives
  2023-02-08 17:18 ` [PATCH 07/10] objtool: Fix overlapping alternatives Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     339b90b37d80ff2f4064bf072584031f3b1b5838
Gitweb:        https://git.kernel.org/tip/339b90b37d80ff2f4064bf072584031f3b1b5838
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:03 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:08 +01:00

objtool: Fix overlapping alternatives

Things like ALTERNATIVE_{2,3}() generate multiple alternatives on the
same place, objtool would override the first orig_alt_group with the
second (or third), failing to check the CFI among all the different
variants.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.711471461@infradead.org
---
 tools/objtool/check.c | 69 ++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6f0adb2..7e9d3d3 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1744,36 +1744,49 @@ static int handle_group_alt(struct objtool_file *file,
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
-	struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+	struct instruction *last_new_insn = NULL, *insn, *nop = NULL;
 	struct alt_group *orig_alt_group, *new_alt_group;
 	unsigned long dest_off;
 
-
-	orig_alt_group = malloc(sizeof(*orig_alt_group));
+	orig_alt_group = orig_insn->alt_group;
 	if (!orig_alt_group) {
-		WARN("malloc failed");
-		return -1;
-	}
-	orig_alt_group->cfi = calloc(special_alt->orig_len,
-				     sizeof(struct cfi_state *));
-	if (!orig_alt_group->cfi) {
-		WARN("calloc failed");
-		return -1;
-	}
+		struct instruction *last_orig_insn = NULL;
 
-	last_orig_insn = NULL;
-	insn = orig_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-			break;
+		orig_alt_group = malloc(sizeof(*orig_alt_group));
+		if (!orig_alt_group) {
+			WARN("malloc failed");
+			return -1;
+		}
+		orig_alt_group->cfi = calloc(special_alt->orig_len,
+					     sizeof(struct cfi_state *));
+		if (!orig_alt_group->cfi) {
+			WARN("calloc failed");
+			return -1;
+		}
 
-		insn->alt_group = orig_alt_group;
-		last_orig_insn = insn;
-	}
-	orig_alt_group->orig_group = NULL;
-	orig_alt_group->first_insn = orig_insn;
-	orig_alt_group->last_insn = last_orig_insn;
+		insn = orig_insn;
+		sec_for_each_insn_from(file, insn) {
+			if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+				break;
 
+			insn->alt_group = orig_alt_group;
+			last_orig_insn = insn;
+		}
+		orig_alt_group->orig_group = NULL;
+		orig_alt_group->first_insn = orig_insn;
+		orig_alt_group->last_insn = last_orig_insn;
+	} else {
+		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
+		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
+			WARN_FUNC("weirdly overlapping alternative! %ld != %d",
+				  orig_insn->sec, orig_insn->offset,
+				  orig_alt_group->last_insn->offset +
+				  orig_alt_group->last_insn->len -
+				  orig_alt_group->first_insn->offset,
+				  special_alt->orig_len);
+			return -1;
+		}
+	}
 
 	new_alt_group = malloc(sizeof(*new_alt_group));
 	if (!new_alt_group) {
@@ -1848,7 +1861,7 @@ static int handle_group_alt(struct objtool_file *file,
 
 		dest_off = arch_jump_destination(insn);
 		if (dest_off == special_alt->new_off + special_alt->new_len) {
-			insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+			insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn);
 			if (!insn->jump_dest) {
 				WARN_FUNC("can't find alternative jump destination",
 					  insn->sec, insn->offset);
@@ -3226,8 +3239,12 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
 		alt_cfi[group_off] = insn->cfi;
 	} else {
 		if (cficmp(alt_cfi[group_off], insn->cfi)) {
-			WARN_FUNC("stack layout conflict in alternatives",
-				  insn->sec, insn->offset);
+			struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group;
+			struct instruction *orig = orig_group->first_insn;
+			char *where = offstr(insn->sec, insn->offset);
+			WARN_FUNC("stack layout conflict in alternatives: %s",
+				  orig->sec, orig->offset, where);
+			free(where);
 			return -1;
 		}
 	}

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] x86: Fix FILL_RETURN_BUFFER
  2023-02-08 17:18 ` [PATCH 08/10] x86: Fix FILL_RETURN_BUFFER Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     2db38eca62cb9e893e7c0bfe0f6c5d327d990680
Gitweb:        https://git.kernel.org/tip/2db38eca62cb9e893e7c0bfe0f6c5d327d990680
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:04 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:09 +01:00

x86: Fix FILL_RETURN_BUFFER

With overlapping alternative validation fixed, objtool promptly
complains:

vmlinux.o: warning: objtool: __switch_to_asm+0x2c: stack layout conflict in alternatives: .altinstr_replacement+0x47

.rela.altinstructions:

000000000000009c  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 16dc
00000000000000a0  0000000600000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 3a
00000000000000a8  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 16dc
00000000000000ac  0000000600000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 66

.text:

00000000000016b0 <__switch_to_asm>:
    16b0:       f3 0f 1e fa             endbr64
    16b4:       55                      push   %rbp
    16b5:       53                      push   %rbx
    16b6:       41 54                   push   %r12
    16b8:       41 55                   push   %r13
    16ba:       41 56                   push   %r14
    16bc:       41 57                   push   %r15
    16be:       48 89 a7 18 0b 00 00    mov    %rsp,0xb18(%rdi)
    16c5:       48 8b a6 18 0b 00 00    mov    0xb18(%rsi),%rsp
    16cc:       48 8b 9e 28 05 00 00    mov    0x528(%rsi),%rbx
    16d3:       65 48 89 1c 25 00 00 00 00      mov    %rbx,%gs:0x0     16d8: R_X86_64_32S      fixed_percpu_data+0x28
    16dc:       eb 2a                   jmp    1708 <__switch_to_asm+0x58>
    16de:       90                      nop
    16df:       90                      nop
    16e0:       90                      nop
    16e1:       90                      nop
    16e2:       90                      nop
    16e3:       90                      nop
    16e4:       90                      nop
    16e5:       90                      nop
    16e6:       90                      nop
    16e7:       90                      nop
    16e8:       90                      nop
    16e9:       90                      nop
    16ea:       90                      nop
    16eb:       90                      nop
    16ec:       90                      nop
    16ed:       90                      nop
    16ee:       90                      nop
    16ef:       90                      nop
    16f0:       90                      nop
    16f1:       90                      nop
    16f2:       90                      nop
    16f3:       90                      nop
    16f4:       90                      nop
    16f5:       90                      nop
    16f6:       90                      nop
    16f7:       90                      nop
    16f8:       90                      nop
    16f9:       90                      nop
    16fa:       90                      nop
    16fb:       90                      nop
    16fc:       90                      nop
    16fd:       90                      nop
    16fe:       90                      nop
    16ff:       90                      nop
    1700:       90                      nop
    1701:       90                      nop
    1702:       90                      nop
    1703:       90                      nop
    1704:       90                      nop
    1705:       90                      nop
    1706:       90                      nop
    1707:       90                      nop
    1708:       41 5f                   pop    %r15
    170a:       41 5e                   pop    %r14
    170c:       41 5d                   pop    %r13
    170e:       41 5c                   pop    %r12
    1710:       5b                      pop    %rbx
    1711:       5d                      pop    %rbp
    1712:       e9 00 00 00 00          jmp    1717 <__switch_to_asm+0x67>      1713: R_X86_64_PLT32    __switch_to-0x4

.altinstr_replacement:

      3a:       49 c7 c4 10 00 00 00    mov    $0x10,%r12
      41:       e8 01 00 00 00          call   47 <.altinstr_replacement+0x47>
      46:       cc                      int3
      47:       e8 01 00 00 00          call   4d <.altinstr_replacement+0x4d>
      4c:       cc                      int3
      4d:       48 83 c4 10             add    $0x10,%rsp
      51:       49 ff cc                dec    %r12
      54:       75 eb                   jne    41 <.altinstr_replacement+0x41>
      56:       0f ae e8                lfence
      59:       65 48 c7 04 25 00 00 00 00 ff ff ff ff  movq   $0xffffffffffffffff,%gs:0x0      5e: R_X86_64_32S        pcpu_hot+0x10

      66:       e8 01 00 00 00          call   6c <.altinstr_replacement+0x6c>
      6b:       cc                      int3
      6c:       48 83 c4 08             add    $0x8,%rsp
      70:       0f ae e8                lfence

As can be seen from the two alternatives, when overlaid, the NOP after
the shorter (starting at 66) coinsides with the call at 47, leading to
conflicting CFI state for that instruction.

By offsetting the shorter alternative by 2 bytes, this alignment is
undone.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.783099843@infradead.org
---
 arch/x86/include/asm/nospec-branch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e04313e..3ef70e5 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -261,7 +261,7 @@
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
 	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
 		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
-		__stringify(__FILL_ONE_RETURN), \ftr2
+		__stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
 
 .Lskip_rsb_\@:
 .endm

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Union instruction::{call_dest,jump_table}
  2023-02-08 17:18 ` [PATCH 06/10] objtool: Union instruction::{call_dest,jump_table} Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     2f586f32286e33ab97730696244737cec84c4bbe
Gitweb:        https://git.kernel.org/tip/2f586f32286e33ab97730696244737cec84c4bbe
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:02 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:08 +01:00

objtool: Union instruction::{call_dest,jump_table}

The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	long unsigned int          immediate;            /*    64     8 */
 	unsigned int               len;                  /*    72     4 */
 	u8                         type;                 /*    76     1 */

 	/* Bitfield combined with previous fields */

 	u16                        dead_end:1;           /*    76: 8  2 */
 	u16                        ignore:1;             /*    76: 9  2 */
 	u16                        ignore_alts:1;        /*    76:10  2 */
 	u16                        hint:1;               /*    76:11  2 */
 	u16                        save:1;               /*    76:12  2 */
 	u16                        restore:1;            /*    76:13  2 */
 	u16                        retpoline_safe:1;     /*    76:14  2 */
 	u16                        noendbr:1;            /*    76:15  2 */
 	u16                        entry:1;              /*    78: 0  2 */
 	u16                        visited:4;            /*    78: 1  2 */
 	u16                        no_reloc:1;           /*    78: 5  2 */

 	/* XXX 2 bits hole, try to pack */
 	/* Bitfield combined with next fields */

 	s8                         instr;                /*    79     1 */
 	struct alt_group *         alt_group;            /*    80     8 */
-	struct symbol *            call_dest;            /*    88     8 */
-	struct instruction *       jump_dest;            /*    96     8 */
-	struct instruction *       first_jump_src;       /*   104     8 */
-	struct reloc *             jump_table;           /*   112     8 */
-	struct alternative *       alts;                 /*   120     8 */
+	struct instruction *       jump_dest;            /*    88     8 */
+	struct instruction *       first_jump_src;       /*    96     8 */
+	union {
+		struct symbol *    _call_dest;           /*   104     8 */
+		struct reloc *     _jump_table;          /*   104     8 */
+	};                                               /*   104     8 */
+	struct alternative *       alts;                 /*   112     8 */
+	struct symbol *            sym;                  /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct symbol *            sym;                  /*   128     8 */
-	struct stack_op *          stack_ops;            /*   136     8 */
-	struct cfi_state *         cfi;                  /*   144     8 */
+	struct stack_op *          stack_ops;            /*   128     8 */
+	struct cfi_state *         cfi;                  /*   136     8 */

-	/* size: 152, cachelines: 3, members: 29 */
-	/* sum members: 150 */
+	/* size: 144, cachelines: 3, members: 28 */
+	/* sum members: 142 */
 	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
-	/* last cacheline: 24 bytes */
+	/* last cacheline: 16 bytes */
 };

pre:	5:39.35 real,   215.58 user,    123.69 sys,     23448736 mem
post:	5:38.18 real,   213.25 user,    124.90 sys,     23449040 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
---
 tools/objtool/check.c                 | 73 ++++++++++++++++----------
 tools/objtool/include/objtool/check.h |  6 +-
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6d0ce23..6f0adb2 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -114,16 +114,34 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return NULL;
+
+	return insn->_call_dest;
+}
+
+static inline struct reloc *insn_jump_table(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return insn->_jump_table;
+
+	return NULL;
+}
+
 static bool is_jump_table_jump(struct instruction *insn)
 {
 	struct alt_group *alt_group = insn->alt_group;
 
-	if (insn->jump_table)
+	if (insn_jump_table(insn))
 		return true;
 
 	/* Retpoline alternative for a jump table? */
 	return alt_group && alt_group->orig_group &&
-	       alt_group->orig_group->first_insn->jump_table;
+	       insn_jump_table(alt_group->orig_group->first_insn);
 }
 
 static bool is_sibling_call(struct instruction *insn)
@@ -137,8 +155,8 @@ static bool is_sibling_call(struct instruction *insn)
 			return !is_jump_table_jump(insn);
 	}
 
-	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
-	return (is_static_jump(insn) && insn->call_dest);
+	/* add_jump_destinations() sets insn_call_dest(insn) for sibling calls. */
+	return (is_static_jump(insn) && insn_call_dest(insn));
 }
 
 /*
@@ -274,8 +292,8 @@ static void init_insn_state(struct objtool_file *file, struct insn_state *state,
 
 	/*
 	 * We need the full vmlinux for noinstr validation, otherwise we can
-	 * not correctly determine insn->call_dest->sec (external symbols do
-	 * not have a section).
+	 * not correctly determine insn_call_dest(insn)->sec (external symbols
+	 * do not have a section).
 	 */
 	if (opts.link && opts.noinstr && sec)
 		state->noinstr = sec->noinstr;
@@ -678,7 +696,7 @@ static int create_static_call_sections(struct objtool_file *file)
 			return -1;
 
 		/* find key symbol */
-		key_name = strdup(insn->call_dest->name);
+		key_name = strdup(insn_call_dest(insn)->name);
 		if (!key_name) {
 			perror("strdup");
 			return -1;
@@ -709,7 +727,7 @@ static int create_static_call_sections(struct objtool_file *file)
 			 * trampoline address.  This is fixed up in
 			 * static_call_add_module().
 			 */
-			key_sym = insn->call_dest;
+			key_sym = insn_call_dest(insn);
 		}
 		free(key_name);
 
@@ -1340,7 +1358,7 @@ static void annotate_call_site(struct objtool_file *file,
 			       struct instruction *insn, bool sibling)
 {
 	struct reloc *reloc = insn_reloc(file, insn);
-	struct symbol *sym = insn->call_dest;
+	struct symbol *sym = insn_call_dest(insn);
 
 	if (!sym)
 		sym = reloc->sym;
@@ -1425,7 +1443,7 @@ static void annotate_call_site(struct objtool_file *file,
 static void add_call_dest(struct objtool_file *file, struct instruction *insn,
 			  struct symbol *dest, bool sibling)
 {
-	insn->call_dest = dest;
+	insn->_call_dest = dest;
 	if (!dest)
 		return;
 
@@ -1683,12 +1701,12 @@ static int add_call_destinations(struct objtool_file *file)
 			if (insn->ignore)
 				continue;
 
-			if (!insn->call_dest) {
+			if (!insn_call_dest(insn)) {
 				WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
 				return -1;
 			}
 
-			if (insn_func(insn) && insn->call_dest->type != STT_FUNC) {
+			if (insn_func(insn) && insn_call_dest(insn)->type != STT_FUNC) {
 				WARN_FUNC("unsupported call to non-function",
 					  insn->sec, insn->offset);
 				return -1;
@@ -2125,7 +2143,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
 		reloc = find_jump_table(file, func, insn);
 		if (reloc) {
 			reloc->jump_table_start = true;
-			insn->jump_table = reloc;
+			insn->_jump_table = reloc;
 		}
 	}
 }
@@ -2137,10 +2155,10 @@ static int add_func_jump_tables(struct objtool_file *file,
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
-		if (!insn->jump_table)
+		if (!insn_jump_table(insn))
 			continue;
 
-		ret = add_jump_table(file, insn, insn->jump_table);
+		ret = add_jump_table(file, insn, insn_jump_table(insn));
 		if (ret)
 			return ret;
 	}
@@ -2612,8 +2630,8 @@ static int decode_sections(struct objtool_file *file)
 static bool is_fentry_call(struct instruction *insn)
 {
 	if (insn->type == INSN_CALL &&
-	    insn->call_dest &&
-	    insn->call_dest->fentry)
+	    insn_call_dest(insn) &&
+	    insn_call_dest(insn)->fentry)
 		return true;
 
 	return false;
@@ -3320,8 +3338,8 @@ static inline const char *call_dest_name(struct instruction *insn)
 	struct reloc *rel;
 	int idx;
 
-	if (insn->call_dest)
-		return insn->call_dest->name;
+	if (insn_call_dest(insn))
+		return insn_call_dest(insn)->name;
 
 	rel = insn_reloc(NULL, insn);
 	if (rel && !strcmp(rel->sym->name, "pv_ops")) {
@@ -3403,13 +3421,13 @@ static int validate_call(struct objtool_file *file,
 			 struct insn_state *state)
 {
 	if (state->noinstr && state->instr <= 0 &&
-	    !noinstr_call_dest(file, insn, insn->call_dest)) {
+	    !noinstr_call_dest(file, insn, insn_call_dest(insn))) {
 		WARN_FUNC("call to %s() leaves .noinstr.text section",
 				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
 	}
 
-	if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
+	if (state->uaccess && !func_uaccess_safe(insn_call_dest(insn))) {
 		WARN_FUNC("call to %s() with UACCESS enabled",
 				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
@@ -3847,11 +3865,11 @@ static int validate_entry(struct objtool_file *file, struct instruction *insn)
 
 			/* fallthrough */
 		case INSN_CALL:
-			dest = find_insn(file, insn->call_dest->sec,
-					 insn->call_dest->offset);
+			dest = find_insn(file, insn_call_dest(insn)->sec,
+					 insn_call_dest(insn)->offset);
 			if (!dest) {
 				WARN("Unresolved function after linking!?: %s",
-				     insn->call_dest->name);
+				     insn_call_dest(insn)->name);
 				return -1;
 			}
 
@@ -3952,13 +3970,13 @@ static int validate_retpoline(struct objtool_file *file)
 static bool is_kasan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+		!strcmp(insn_call_dest(insn)->name, "__asan_handle_no_return"));
 }
 
 static bool is_ubsan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name,
+		!strcmp(insn_call_dest(insn)->name,
 			"__ubsan_handle_builtin_unreachable"));
 }
 
@@ -4036,7 +4054,8 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
 	prev_insn = list_prev_entry(insn, list);
-	if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
+	if ((prev_insn->dead_end ||
+	     dead_end_function(file, insn_call_dest(prev_insn))) &&
 	    (insn->type == INSN_BUG ||
 	     (insn->type == INSN_JUMP_UNCONDITIONAL &&
 	      insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index fffc8b8..ab6deae 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -62,10 +62,12 @@ struct instruction {
 	s8 instr;
 
 	struct alt_group *alt_group;
-	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
-	struct reloc *jump_table;
+	union {
+		struct symbol *_call_dest;
+		struct reloc *_jump_table;
+	};
 	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Remove instruction::reloc
  2023-02-08 17:18 ` [PATCH 05/10] objtool: Remove instruction::reloc Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     bea0e38288dd36f81de4fd332086b99654d5f389
Gitweb:        https://git.kernel.org/tip/bea0e38288dd36f81de4fd332086b99654d5f389
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:01 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:08 +01:00

objtool: Remove instruction::reloc

Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	long unsigned int          immediate;            /*    64     8 */
 	unsigned int               len;                  /*    72     4 */
 	u8                         type;                 /*    76     1 */

 	/* Bitfield combined with previous fields */

 	u16                        dead_end:1;           /*    76: 8  2 */
 	u16                        ignore:1;             /*    76: 9  2 */
 	u16                        ignore_alts:1;        /*    76:10  2 */
 	u16                        hint:1;               /*    76:11  2 */
 	u16                        save:1;               /*    76:12  2 */
 	u16                        restore:1;            /*    76:13  2 */
 	u16                        retpoline_safe:1;     /*    76:14  2 */
 	u16                        noendbr:1;            /*    76:15  2 */
 	u16                        entry:1;              /*    78: 0  2 */
 	u16                        visited:4;            /*    78: 1  2 */
+	u16                        no_reloc:1;           /*    78: 5  2 */

-	/* XXX 3 bits hole, try to pack */
+	/* XXX 2 bits hole, try to pack */
 	/* Bitfield combined with next fields */

 	s8                         instr;                /*    79     1 */
 	struct alt_group *         alt_group;            /*    80     8 */
 	struct symbol *            call_dest;            /*    88     8 */
 	struct instruction *       jump_dest;            /*    96     8 */
 	struct instruction *       first_jump_src;       /*   104     8 */
 	struct reloc *             jump_table;           /*   112     8 */
-	struct reloc *             reloc;                /*   120     8 */
+	struct alternative *       alts;                 /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct alternative *       alts;                 /*   128     8 */
-	struct symbol *            sym;                  /*   136     8 */
-	struct stack_op *          stack_ops;            /*   144     8 */
-	struct cfi_state *         cfi;                  /*   152     8 */
+	struct symbol *            sym;                  /*   128     8 */
+	struct stack_op *          stack_ops;            /*   136     8 */
+	struct cfi_state *         cfi;                  /*   144     8 */

-	/* size: 160, cachelines: 3, members: 29 */
-	/* sum members: 158 */
-	/* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
-	/* last cacheline: 32 bytes */
+	/* size: 152, cachelines: 3, members: 29 */
+	/* sum members: 150 */
+	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+	/* last cacheline: 24 bytes */
 };

pre:	5:48.89 real,   220.96 user,    127.55 sys,     24834672 mem
post:	5:39.35 real,   215.58 user,    123.69 sys,     23448736 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
---
 tools/objtool/check.c                 | 24 +++++++++++-------------
 tools/objtool/include/objtool/check.h |  6 +++---
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9f83e85..6d0ce23 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1305,26 +1305,24 @@ __weak bool arch_is_rethunk(struct symbol *sym)
 	return false;
 }
 
-#define NEGATIVE_RELOC	((void *)-1L)
-
 static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
 {
-	if (insn->reloc == NEGATIVE_RELOC)
+	struct reloc *reloc;
+
+	if (insn->no_reloc)
 		return NULL;
 
-	if (!insn->reloc) {
-		if (!file)
-			return NULL;
+	if (!file)
+		return NULL;
 
-		insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-						       insn->offset, insn->len);
-		if (!insn->reloc) {
-			insn->reloc = NEGATIVE_RELOC;
-			return NULL;
-		}
+	reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+					 insn->offset, insn->len);
+	if (!reloc) {
+		insn->no_reloc = 1;
+		return NULL;
 	}
 
-	return insn->reloc;
+	return reloc;
 }
 
 static void remove_insn_ops(struct instruction *insn)
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index a497ee7..fffc8b8 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -55,8 +55,9 @@ struct instruction {
 	   retpoline_safe	: 1,
 	   noendbr		: 1,
 	   entry		: 1,
-	   visited		: 4;
-		/* 3 bit hole */
+	   visited		: 4,
+	   no_reloc		: 1;
+		/* 2 bit hole */
 
 	s8 instr;
 
@@ -65,7 +66,6 @@ struct instruction {
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
-	struct reloc *reloc;
 	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Shrink instruction::{type,visited}
  2023-02-08 17:18 ` [PATCH 04/10] objtool: Shrink instruction::{type,visited} Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     627e45a4d618a958f8fd0de76d845b41e6d6b250
Gitweb:        https://git.kernel.org/tip/627e45a4d618a958f8fd0de76d845b41e6d6b250
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:00 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:07 +01:00

objtool: Shrink instruction::{type,visited}

Since we don't have that many types in enum insn_type, force it into a
u8 and re-arrange member to get rid of the holes, saves another 8
bytes.

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
-	unsigned int               len;                  /*    64     4 */
-	enum insn_type             type;                 /*    68     4 */
-	long unsigned int          immediate;            /*    72     8 */
-	u16                        dead_end:1;           /*    80: 0  2 */
-	u16                        ignore:1;             /*    80: 1  2 */
-	u16                        ignore_alts:1;        /*    80: 2  2 */
-	u16                        hint:1;               /*    80: 3  2 */
-	u16                        save:1;               /*    80: 4  2 */
-	u16                        restore:1;            /*    80: 5  2 */
-	u16                        retpoline_safe:1;     /*    80: 6  2 */
-	u16                        noendbr:1;            /*    80: 7  2 */
-	u16                        entry:1;              /*    80: 8  2 */
+	long unsigned int          immediate;            /*    64     8 */
+	unsigned int               len;                  /*    72     4 */
+	u8                         type;                 /*    76     1 */

-	/* XXX 7 bits hole, try to pack */
+	/* Bitfield combined with previous fields */

-	s8                         instr;                /*    82     1 */
-	u8                         visited;              /*    83     1 */
+	u16                        dead_end:1;           /*    76: 8  2 */
+	u16                        ignore:1;             /*    76: 9  2 */
+	u16                        ignore_alts:1;        /*    76:10  2 */
+	u16                        hint:1;               /*    76:11  2 */
+	u16                        save:1;               /*    76:12  2 */
+	u16                        restore:1;            /*    76:13  2 */
+	u16                        retpoline_safe:1;     /*    76:14  2 */
+	u16                        noendbr:1;            /*    76:15  2 */
+	u16                        entry:1;              /*    78: 0  2 */
+	u16                        visited:4;            /*    78: 1  2 */

-	/* XXX 4 bytes hole, try to pack */
+	/* XXX 3 bits hole, try to pack */
+	/* Bitfield combined with next fields */

-	struct alt_group *         alt_group;            /*    88     8 */
-	struct symbol *            call_dest;            /*    96     8 */
-	struct instruction *       jump_dest;            /*   104     8 */
-	struct instruction *       first_jump_src;       /*   112     8 */
-	struct reloc *             jump_table;           /*   120     8 */
+	s8                         instr;                /*    79     1 */
+	struct alt_group *         alt_group;            /*    80     8 */
+	struct symbol *            call_dest;            /*    88     8 */
+	struct instruction *       jump_dest;            /*    96     8 */
+	struct instruction *       first_jump_src;       /*   104     8 */
+	struct reloc *             jump_table;           /*   112     8 */
+	struct reloc *             reloc;                /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct reloc *             reloc;                /*   128     8 */
-	struct alternative *       alts;                 /*   136     8 */
-	struct symbol *            sym;                  /*   144     8 */
-	struct stack_op *          stack_ops;            /*   152     8 */
-	struct cfi_state *         cfi;                  /*   160     8 */
+	struct alternative *       alts;                 /*   128     8 */
+	struct symbol *            sym;                  /*   136     8 */
+	struct stack_op *          stack_ops;            /*   144     8 */
+	struct cfi_state *         cfi;                  /*   152     8 */

-	/* size: 168, cachelines: 3, members: 29 */
-	/* sum members: 162, holes: 1, sum holes: 4 */
-	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 40 bytes */
+	/* size: 160, cachelines: 3, members: 29 */
+	/* sum members: 158 */
+	/* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
+	/* last cacheline: 32 bytes */
 };

pre:	5:48.86 real,   220.30 user,    128.34 sys,     24834672 mem
post:	5:48.89 real,   220.96 user,    127.55 sys,     24834672 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.501847188@infradead.org
---
 tools/objtool/include/objtool/check.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 7966f60..a497ee7 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -42,9 +42,9 @@ struct instruction {
 	struct list_head call_node;
 	struct section *sec;
 	unsigned long offset;
-	unsigned int len;
-	enum insn_type type;
 	unsigned long immediate;
+	unsigned int len;
+	u8 type;
 
 	u16 dead_end		: 1,
 	   ignore		: 1,
@@ -54,11 +54,11 @@ struct instruction {
 	   restore		: 1,
 	   retpoline_safe	: 1,
 	   noendbr		: 1,
-	   entry		: 1;
-		/* 7 bit hole */
+	   entry		: 1,
+	   visited		: 4;
+		/* 3 bit hole */
 
 	s8 instr;
-	u8 visited;
 
 	struct alt_group *alt_group;
 	struct symbol *call_dest;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Make instruction::alts a single-linked list
  2023-02-08 17:17 ` [PATCH 03/10] objtool: Make instruction::alts " Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     d15b41e98079755f21e49a2e60465ded7b910ba2
Gitweb:        https://git.kernel.org/tip/d15b41e98079755f21e49a2e60465ded7b910ba2
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:17:59 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:07 +01:00

objtool: Make instruction::alts a single-linked list

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	unsigned int               len;                  /*    64     4 */
 	enum insn_type             type;                 /*    68     4 */
 	long unsigned int          immediate;            /*    72     8 */
 	u16                        dead_end:1;           /*    80: 0  2 */
 	u16                        ignore:1;             /*    80: 1  2 */
 	u16                        ignore_alts:1;        /*    80: 2  2 */
 	u16                        hint:1;               /*    80: 3  2 */
 	u16                        save:1;               /*    80: 4  2 */
 	u16                        restore:1;            /*    80: 5  2 */
 	u16                        retpoline_safe:1;     /*    80: 6  2 */
 	u16                        noendbr:1;            /*    80: 7  2 */
 	u16                        entry:1;              /*    80: 8  2 */

 	/* XXX 7 bits hole, try to pack */

 	s8                         instr;                /*    82     1 */
 	u8                         visited;              /*    83     1 */

 	/* XXX 4 bytes hole, try to pack */

 	struct alt_group *         alt_group;            /*    88     8 */
 	struct symbol *            call_dest;            /*    96     8 */
 	struct instruction *       jump_dest;            /*   104     8 */
 	struct instruction *       first_jump_src;       /*   112     8 */
 	struct reloc *             jump_table;           /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct reloc *             reloc;                /*   128     8 */
-	struct list_head           alts;                 /*   136    16 */
-	struct symbol *            sym;                  /*   152     8 */
-	struct stack_op *          stack_ops;            /*   160     8 */
-	struct cfi_state *         cfi;                  /*   168     8 */
+	struct alternative *       alts;                 /*   136     8 */
+	struct symbol *            sym;                  /*   144     8 */
+	struct stack_op *          stack_ops;            /*   152     8 */
+	struct cfi_state *         cfi;                  /*   160     8 */

-	/* size: 176, cachelines: 3, members: 29 */
-	/* sum members: 170, holes: 1, sum holes: 4 */
+	/* size: 168, cachelines: 3, members: 29 */
+	/* sum members: 162, holes: 1, sum holes: 4 */
 	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 48 bytes */
+	/* last cacheline: 40 bytes */
 };

pre:	5:58.50 real,   229.64 user,    128.65 sys,     26221520 mem
post:	5:48.86 real,   220.30 user,    128.34 sys,     24834672 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
---
 tools/objtool/check.c                 | 18 +++++++++---------
 tools/objtool/include/objtool/check.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8109d74..9f83e85 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -23,7 +23,7 @@
 #include <linux/static_call_types.h>
 
 struct alternative {
-	struct list_head list;
+	struct alternative *next;
 	struct instruction *insn;
 	bool skip_orig;
 };
@@ -397,7 +397,6 @@ static int decode_instructions(struct objtool_file *file)
 				return -1;
 			}
 			memset(insn, 0, sizeof(*insn));
-			INIT_LIST_HEAD(&insn->alts);
 			INIT_LIST_HEAD(&insn->call_node);
 
 			insn->sec = sec;
@@ -1780,7 +1779,6 @@ static int handle_group_alt(struct objtool_file *file,
 			return -1;
 		}
 		memset(nop, 0, sizeof(*nop));
-		INIT_LIST_HEAD(&nop->alts);
 
 		nop->sec = special_alt->new_sec;
 		nop->offset = special_alt->new_off + special_alt->new_len;
@@ -1978,7 +1976,8 @@ static int add_special_section_alts(struct objtool_file *file)
 		alt->insn = new_insn;
 		alt->skip_orig = special_alt->skip_orig;
 		orig_insn->ignore_alts |= special_alt->skip_alt;
-		list_add_tail(&alt->list, &orig_insn->alts);
+		alt->next = orig_insn->alts;
+		orig_insn->alts = alt;
 
 		list_del(&special_alt->list);
 		free(special_alt);
@@ -2037,7 +2036,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 		}
 
 		alt->insn = dest_insn;
-		list_add_tail(&alt->list, &insn->alts);
+		alt->next = insn->alts;
+		insn->alts = alt;
 		prev_offset = reloc->offset;
 	}
 
@@ -3594,10 +3594,10 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 		if (propagate_alt_cfi(file, insn))
 			return 1;
 
-		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+		if (!insn->ignore_alts && insn->alts) {
 			bool skip_orig = false;
 
-			list_for_each_entry(alt, &insn->alts, list) {
+			for (alt = insn->alts; alt; alt = alt->next) {
 				if (alt->skip_orig)
 					skip_orig = true;
 
@@ -3796,11 +3796,11 @@ static int validate_entry(struct objtool_file *file, struct instruction *insn)
 
 		insn->visited |= VISITED_ENTRY;
 
-		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+		if (!insn->ignore_alts && insn->alts) {
 			struct alternative *alt;
 			bool skip_orig = false;
 
-			list_for_each_entry(alt, &insn->alts, list) {
+			for (alt = insn->alts; alt; alt = alt->next) {
 				if (alt->skip_orig)
 					skip_orig = true;
 
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 23e9819..7966f60 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -66,7 +66,7 @@ struct instruction {
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
 	struct reloc *reloc;
-	struct list_head alts;
+	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;
 	struct cfi_state *cfi;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Make instruction::stack_ops a single-linked list
  2023-02-08 17:17 ` [PATCH 02/10] objtool: Make instruction::stack_ops a single-linked list Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     e4947e3df118ce83107e868357bf1ada0b4c7531
Gitweb:        https://git.kernel.org/tip/e4947e3df118ce83107e868357bf1ada0b4c7531
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:17:58 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:07 +01:00

objtool: Make instruction::stack_ops a single-linked list

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	unsigned int               len;                  /*    64     4 */
 	enum insn_type             type;                 /*    68     4 */
 	long unsigned int          immediate;            /*    72     8 */
 	u16                        dead_end:1;           /*    80: 0  2 */
 	u16                        ignore:1;             /*    80: 1  2 */
 	u16                        ignore_alts:1;        /*    80: 2  2 */
 	u16                        hint:1;               /*    80: 3  2 */
 	u16                        save:1;               /*    80: 4  2 */
 	u16                        restore:1;            /*    80: 5  2 */
 	u16                        retpoline_safe:1;     /*    80: 6  2 */
 	u16                        noendbr:1;            /*    80: 7  2 */
 	u16                        entry:1;              /*    80: 8  2 */

 	/* XXX 7 bits hole, try to pack */

 	s8                         instr;                /*    82     1 */
 	u8                         visited;              /*    83     1 */

 	/* XXX 4 bytes hole, try to pack */

 	struct alt_group *         alt_group;            /*    88     8 */
 	struct symbol *            call_dest;            /*    96     8 */
 	struct instruction *       jump_dest;            /*   104     8 */
 	struct instruction *       first_jump_src;       /*   112     8 */
 	struct reloc *             jump_table;           /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct reloc *             reloc;                /*   128     8 */
 	struct list_head           alts;                 /*   136    16 */
 	struct symbol *            sym;                  /*   152     8 */
-	struct list_head           stack_ops;            /*   160    16 */
-	struct cfi_state *         cfi;                  /*   176     8 */
+	struct stack_op *          stack_ops;            /*   160     8 */
+	struct cfi_state *         cfi;                  /*   168     8 */

-	/* size: 184, cachelines: 3, members: 29 */
-	/* sum members: 178, holes: 1, sum holes: 4 */
+	/* size: 176, cachelines: 3, members: 29 */
+	/* sum members: 170, holes: 1, sum holes: 4 */
 	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 56 bytes */
+	/* last cacheline: 48 bytes */
 };

pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
post:	5:58.50 real,   229.64 user,    128.65 sys,     26221520 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
---
 tools/objtool/arch/x86/decode.c       |  4 ++--
 tools/objtool/check.c                 | 11 +++++------
 tools/objtool/include/objtool/arch.h  |  2 +-
 tools/objtool/include/objtool/check.h |  2 +-
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index c5c4927..9ef024f 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -105,7 +105,7 @@ bool arch_pc_relative_reloc(struct reloc *reloc)
 #define ADD_OP(op) \
 	if (!(op = calloc(1, sizeof(*op)))) \
 		return -1; \
-	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+	else for (*ops_list = op, ops_list = &op->next; op; op = NULL)
 
 /*
  * Helpers to decode ModRM/SIB:
@@ -148,7 +148,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			    unsigned long offset, unsigned int maxlen,
 			    struct instruction *insn)
 {
-	struct list_head *ops_list = &insn->stack_ops;
+	struct stack_op **ops_list = &insn->stack_ops;
 	const struct elf *elf = file->elf;
 	struct insn ins;
 	int x86_64, ret;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b3b423d..8109d74 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -398,7 +398,6 @@ static int decode_instructions(struct objtool_file *file)
 			}
 			memset(insn, 0, sizeof(*insn));
 			INIT_LIST_HEAD(&insn->alts);
-			INIT_LIST_HEAD(&insn->stack_ops);
 			INIT_LIST_HEAD(&insn->call_node);
 
 			insn->sec = sec;
@@ -1331,12 +1330,13 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i
 
 static void remove_insn_ops(struct instruction *insn)
 {
-	struct stack_op *op, *tmp;
+	struct stack_op *op, *next;
 
-	list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
-		list_del(&op->list);
+	for (op = insn->stack_ops; op; op = next) {
+		next = op->next;
 		free(op);
 	}
+	insn->stack_ops = NULL;
 }
 
 static void annotate_call_site(struct objtool_file *file,
@@ -1781,7 +1781,6 @@ static int handle_group_alt(struct objtool_file *file,
 		}
 		memset(nop, 0, sizeof(*nop));
 		INIT_LIST_HEAD(&nop->alts);
-		INIT_LIST_HEAD(&nop->stack_ops);
 
 		nop->sec = special_alt->new_sec;
 		nop->offset = special_alt->new_off + special_alt->new_len;
@@ -3226,7 +3225,7 @@ static int handle_insn_ops(struct instruction *insn,
 {
 	struct stack_op *op;
 
-	list_for_each_entry(op, &insn->stack_ops, list) {
+	for (op = insn->stack_ops; op; op = op->next) {
 
 		if (update_cfi_state(insn, next_insn, &state->cfi, op))
 			return 1;
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 73149f8..2b6d2ce 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -62,9 +62,9 @@ struct op_src {
 };
 
 struct stack_op {
+	struct stack_op *next;
 	struct op_dest dest;
 	struct op_src src;
-	struct list_head list;
 };
 
 struct instruction;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index acd7fae..23e9819 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -68,7 +68,7 @@ struct instruction {
 	struct reloc *reloc;
 	struct list_head alts;
 	struct symbol *sym;
-	struct list_head stack_ops;
+	struct stack_op *stack_ops;
 	struct cfi_state *cfi;
 };
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Change arch_decode_instruction() signature
  2023-02-08 17:17 ` [PATCH 01/10] objtool: Change arch_decode_instruction() signature Peter Zijlstra
@ 2023-02-13 11:10   ` tip-bot2 for Peter Zijlstra
  2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-13 11:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Josh Poimboeuf, Nathan Chancellor, linux, x86, linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     a6267fd5dda7e93abeb96277c63cae595eeab6ee
Gitweb:        https://git.kernel.org/tip/a6267fd5dda7e93abeb96277c63cae595eeab6ee
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:17:57 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Feb 2023 11:26:06 +01:00

objtool: Change arch_decode_instruction() signature

In preparation to changing struct instruction around a bit, avoid
passing it's members by pointer and instead pass the whole thing.

A cleanup in it's own right too.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.291087549@infradead.org
---
 tools/objtool/arch/powerpc/decode.c  |  22 ++---
 tools/objtool/arch/x86/decode.c      | 105 ++++++++++++--------------
 tools/objtool/check.c                |   4 +-
 tools/objtool/include/objtool/arch.h |   4 +-
 4 files changed, 64 insertions(+), 71 deletions(-)

diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index 9c65380..53b5569 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -41,38 +41,36 @@ const char *arch_ret_insn(int len)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list)
+			    struct instruction *insn)
 {
 	unsigned int opcode;
 	enum insn_type typ;
 	unsigned long imm;
-	u32 insn;
+	u32 ins;
 
-	insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
-	opcode = insn >> 26;
+	ins = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
+	opcode = ins >> 26;
 	typ = INSN_OTHER;
 	imm = 0;
 
 	switch (opcode) {
 	case 18: /* b[l][a] */
-		if ((insn & 3) == 1) /* bl */
+		if ((ins & 3) == 1) /* bl */
 			typ = INSN_CALL;
 
-		imm = insn & 0x3fffffc;
+		imm = ins & 0x3fffffc;
 		if (imm & 0x2000000)
 			imm -= 0x4000000;
 		break;
 	}
 
 	if (opcode == 1)
-		*len = 8;
+		insn->len = 8;
 	else
-		*len = 4;
+		insn->len = 4;
 
-	*type = typ;
-	*immediate = imm;
+	insn->type = typ;
+	insn->immediate = imm;
 
 	return 0;
 }
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index e7b030f..c5c4927 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -146,12 +146,11 @@ static bool has_notrack_prefix(struct insn *insn)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list)
+			    struct instruction *insn)
 {
+	struct list_head *ops_list = &insn->stack_ops;
 	const struct elf *elf = file->elf;
-	struct insn insn;
+	struct insn ins;
 	int x86_64, ret;
 	unsigned char op1, op2, op3, prefix,
 		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
@@ -165,42 +164,42 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 	if (x86_64 == -1)
 		return -1;
 
-	ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+	ret = insn_decode(&ins, sec->data->d_buf + offset, maxlen,
 			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
 	if (ret < 0) {
 		WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
 		return -1;
 	}
 
-	*len = insn.length;
-	*type = INSN_OTHER;
+	insn->len = ins.length;
+	insn->type = INSN_OTHER;
 
-	if (insn.vex_prefix.nbytes)
+	if (ins.vex_prefix.nbytes)
 		return 0;
 
-	prefix = insn.prefixes.bytes[0];
+	prefix = ins.prefixes.bytes[0];
 
-	op1 = insn.opcode.bytes[0];
-	op2 = insn.opcode.bytes[1];
-	op3 = insn.opcode.bytes[2];
+	op1 = ins.opcode.bytes[0];
+	op2 = ins.opcode.bytes[1];
+	op3 = ins.opcode.bytes[2];
 
-	if (insn.rex_prefix.nbytes) {
-		rex = insn.rex_prefix.bytes[0];
+	if (ins.rex_prefix.nbytes) {
+		rex = ins.rex_prefix.bytes[0];
 		rex_w = X86_REX_W(rex) >> 3;
 		rex_r = X86_REX_R(rex) >> 2;
 		rex_x = X86_REX_X(rex) >> 1;
 		rex_b = X86_REX_B(rex);
 	}
 
-	if (insn.modrm.nbytes) {
-		modrm = insn.modrm.bytes[0];
+	if (ins.modrm.nbytes) {
+		modrm = ins.modrm.bytes[0];
 		modrm_mod = X86_MODRM_MOD(modrm);
 		modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
 		modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
 	}
 
-	if (insn.sib.nbytes) {
-		sib = insn.sib.bytes[0];
+	if (ins.sib.nbytes) {
+		sib = ins.sib.bytes[0];
 		/* sib_scale = X86_SIB_SCALE(sib); */
 		sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
 		sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
@@ -254,7 +253,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0x70 ... 0x7f:
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0x80 ... 0x83:
@@ -278,7 +277,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		if (!rm_is_reg(CFI_SP))
 			break;
 
-		imm = insn.immediate.value;
+		imm = ins.immediate.value;
 		if (op1 & 2) { /* sign extend */
 			if (op1 & 1) { /* imm32 */
 				imm <<= 32;
@@ -309,7 +308,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			ADD_OP(op) {
 				op->src.type = OP_SRC_AND;
 				op->src.reg = CFI_SP;
-				op->src.offset = insn.immediate.value;
+				op->src.offset = ins.immediate.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = CFI_SP;
 			}
@@ -356,7 +355,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 					op->src.reg = CFI_SP;
 					op->dest.type = OP_DEST_REG_INDIRECT;
 					op->dest.reg = modrm_rm;
-					op->dest.offset = insn.displacement.value;
+					op->dest.offset = ins.displacement.value;
 				}
 				break;
 			}
@@ -389,7 +388,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_BP;
-				op->dest.offset = insn.displacement.value;
+				op->dest.offset = ins.displacement.value;
 			}
 			break;
 		}
@@ -402,7 +401,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_SP;
-				op->dest.offset = insn.displacement.value;
+				op->dest.offset = ins.displacement.value;
 			}
 			break;
 		}
@@ -419,7 +418,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_BP;
-				op->src.offset = insn.displacement.value;
+				op->src.offset = ins.displacement.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = modrm_reg;
 			}
@@ -432,7 +431,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_SP;
-				op->src.offset = insn.displacement.value;
+				op->src.offset = ins.displacement.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = modrm_reg;
 			}
@@ -464,7 +463,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 		/* lea disp(%src), %dst */
 		ADD_OP(op) {
-			op->src.offset = insn.displacement.value;
+			op->src.offset = ins.displacement.value;
 			if (!op->src.offset) {
 				/* lea (%src), %dst */
 				op->src.type = OP_SRC_REG;
@@ -487,7 +486,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0x90:
-		*type = INSN_NOP;
+		insn->type = INSN_NOP;
 		break;
 
 	case 0x9c:
@@ -511,39 +510,39 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		if (op2 == 0x01) {
 
 			if (modrm == 0xca)
-				*type = INSN_CLAC;
+				insn->type = INSN_CLAC;
 			else if (modrm == 0xcb)
-				*type = INSN_STAC;
+				insn->type = INSN_STAC;
 
 		} else if (op2 >= 0x80 && op2 <= 0x8f) {
 
-			*type = INSN_JUMP_CONDITIONAL;
+			insn->type = INSN_JUMP_CONDITIONAL;
 
 		} else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
 			   op2 == 0x35) {
 
 			/* sysenter, sysret */
-			*type = INSN_CONTEXT_SWITCH;
+			insn->type = INSN_CONTEXT_SWITCH;
 
 		} else if (op2 == 0x0b || op2 == 0xb9) {
 
 			/* ud2 */
-			*type = INSN_BUG;
+			insn->type = INSN_BUG;
 
 		} else if (op2 == 0x0d || op2 == 0x1f) {
 
 			/* nopl/nopw */
-			*type = INSN_NOP;
+			insn->type = INSN_NOP;
 
 		} else if (op2 == 0x1e) {
 
 			if (prefix == 0xf3 && (modrm == 0xfa || modrm == 0xfb))
-				*type = INSN_ENDBR;
+				insn->type = INSN_ENDBR;
 
 
 		} else if (op2 == 0x38 && op3 == 0xf8) {
-			if (insn.prefixes.nbytes == 1 &&
-			    insn.prefixes.bytes[0] == 0xf2) {
+			if (ins.prefixes.nbytes == 1 &&
+			    ins.prefixes.bytes[0] == 0xf2) {
 				/* ENQCMD cannot be used in the kernel. */
 				WARN("ENQCMD instruction at %s:%lx", sec->name,
 				     offset);
@@ -591,29 +590,29 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 	case 0xcc:
 		/* int3 */
-		*type = INSN_TRAP;
+		insn->type = INSN_TRAP;
 		break;
 
 	case 0xe3:
 		/* jecxz/jrcxz */
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0xe9:
 	case 0xeb:
-		*type = INSN_JUMP_UNCONDITIONAL;
+		insn->type = INSN_JUMP_UNCONDITIONAL;
 		break;
 
 	case 0xc2:
 	case 0xc3:
-		*type = INSN_RETURN;
+		insn->type = INSN_RETURN;
 		break;
 
 	case 0xc7: /* mov imm, r/m */
 		if (!opts.noinstr)
 			break;
 
-		if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
+		if (ins.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
 			struct reloc *immr, *disp;
 			struct symbol *func;
 			int idx;
@@ -661,17 +660,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
-		*type = INSN_CONTEXT_SWITCH;
+		insn->type = INSN_CONTEXT_SWITCH;
 		break;
 
 	case 0xe0: /* loopne */
 	case 0xe1: /* loope */
 	case 0xe2: /* loop */
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0xe8:
-		*type = INSN_CALL;
+		insn->type = INSN_CALL;
 		/*
 		 * For the impact on the stack, a CALL behaves like
 		 * a PUSH of an immediate value (the return address).
@@ -683,30 +682,30 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0xfc:
-		*type = INSN_CLD;
+		insn->type = INSN_CLD;
 		break;
 
 	case 0xfd:
-		*type = INSN_STD;
+		insn->type = INSN_STD;
 		break;
 
 	case 0xff:
 		if (modrm_reg == 2 || modrm_reg == 3) {
 
-			*type = INSN_CALL_DYNAMIC;
-			if (has_notrack_prefix(&insn))
+			insn->type = INSN_CALL_DYNAMIC;
+			if (has_notrack_prefix(&ins))
 				WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
 		} else if (modrm_reg == 4) {
 
-			*type = INSN_JUMP_DYNAMIC;
-			if (has_notrack_prefix(&insn))
+			insn->type = INSN_JUMP_DYNAMIC;
+			if (has_notrack_prefix(&ins))
 				WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
 		} else if (modrm_reg == 5) {
 
 			/* jmpf */
-			*type = INSN_CONTEXT_SWITCH;
+			insn->type = INSN_CONTEXT_SWITCH;
 
 		} else if (modrm_reg == 6) {
 
@@ -723,7 +722,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 	}
 
-	*immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
+	insn->immediate = ins.immediate.nbytes ? ins.immediate.value : 0;
 
 	return 0;
 }
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ba07a8e..b3b423d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -406,9 +406,7 @@ static int decode_instructions(struct objtool_file *file)
 
 			ret = arch_decode_instruction(file, sec, offset,
 						      sec->sh.sh_size - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate,
-						      &insn->stack_ops);
+						      insn);
 			if (ret)
 				goto err;
 
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 4ecb480..73149f8 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -75,9 +75,7 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state);
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list);
+			    struct instruction *insn);
 
 bool arch_callee_saved_reg(unsigned char reg);
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] x86: Fix FILL_RETURN_BUFFER
  2023-02-08 17:18 ` [PATCH 08/10] x86: Fix FILL_RETURN_BUFFER Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     6ea17e848a8ba5138b30e936c4b71877bc972c13
Gitweb:        https://git.kernel.org/tip/6ea17e848a8ba5138b30e936c4b71877bc972c13
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:04 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:37 +01:00

x86: Fix FILL_RETURN_BUFFER

With overlapping alternative validation fixed, objtool promptly
complains:

vmlinux.o: warning: objtool: __switch_to_asm+0x2c: stack layout conflict in alternatives: .altinstr_replacement+0x47

.rela.altinstructions:

000000000000009c  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 16dc
00000000000000a0  0000000600000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 3a
00000000000000a8  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 16dc
00000000000000ac  0000000600000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 66

.text:

00000000000016b0 <__switch_to_asm>:
    16b0:       f3 0f 1e fa             endbr64
    16b4:       55                      push   %rbp
    16b5:       53                      push   %rbx
    16b6:       41 54                   push   %r12
    16b8:       41 55                   push   %r13
    16ba:       41 56                   push   %r14
    16bc:       41 57                   push   %r15
    16be:       48 89 a7 18 0b 00 00    mov    %rsp,0xb18(%rdi)
    16c5:       48 8b a6 18 0b 00 00    mov    0xb18(%rsi),%rsp
    16cc:       48 8b 9e 28 05 00 00    mov    0x528(%rsi),%rbx
    16d3:       65 48 89 1c 25 00 00 00 00      mov    %rbx,%gs:0x0     16d8: R_X86_64_32S      fixed_percpu_data+0x28
    16dc:       eb 2a                   jmp    1708 <__switch_to_asm+0x58>
    16de:       90                      nop
    16df:       90                      nop
    16e0:       90                      nop
    16e1:       90                      nop
    16e2:       90                      nop
    16e3:       90                      nop
    16e4:       90                      nop
    16e5:       90                      nop
    16e6:       90                      nop
    16e7:       90                      nop
    16e8:       90                      nop
    16e9:       90                      nop
    16ea:       90                      nop
    16eb:       90                      nop
    16ec:       90                      nop
    16ed:       90                      nop
    16ee:       90                      nop
    16ef:       90                      nop
    16f0:       90                      nop
    16f1:       90                      nop
    16f2:       90                      nop
    16f3:       90                      nop
    16f4:       90                      nop
    16f5:       90                      nop
    16f6:       90                      nop
    16f7:       90                      nop
    16f8:       90                      nop
    16f9:       90                      nop
    16fa:       90                      nop
    16fb:       90                      nop
    16fc:       90                      nop
    16fd:       90                      nop
    16fe:       90                      nop
    16ff:       90                      nop
    1700:       90                      nop
    1701:       90                      nop
    1702:       90                      nop
    1703:       90                      nop
    1704:       90                      nop
    1705:       90                      nop
    1706:       90                      nop
    1707:       90                      nop
    1708:       41 5f                   pop    %r15
    170a:       41 5e                   pop    %r14
    170c:       41 5d                   pop    %r13
    170e:       41 5c                   pop    %r12
    1710:       5b                      pop    %rbx
    1711:       5d                      pop    %rbp
    1712:       e9 00 00 00 00          jmp    1717 <__switch_to_asm+0x67>      1713: R_X86_64_PLT32    __switch_to-0x4

.altinstr_replacement:

      3a:       49 c7 c4 10 00 00 00    mov    $0x10,%r12
      41:       e8 01 00 00 00          call   47 <.altinstr_replacement+0x47>
      46:       cc                      int3
      47:       e8 01 00 00 00          call   4d <.altinstr_replacement+0x4d>
      4c:       cc                      int3
      4d:       48 83 c4 10             add    $0x10,%rsp
      51:       49 ff cc                dec    %r12
      54:       75 eb                   jne    41 <.altinstr_replacement+0x41>
      56:       0f ae e8                lfence
      59:       65 48 c7 04 25 00 00 00 00 ff ff ff ff  movq   $0xffffffffffffffff,%gs:0x0      5e: R_X86_64_32S        pcpu_hot+0x10

      66:       e8 01 00 00 00          call   6c <.altinstr_replacement+0x6c>
      6b:       cc                      int3
      6c:       48 83 c4 08             add    $0x8,%rsp
      70:       0f ae e8                lfence

As can be seen from the two alternatives, when overlaid, the NOP after
the shorter (starting at 66) coinsides with the call at 47, leading to
conflicting CFI state for that instruction.

By offsetting the shorter alternative by 2 bytes, this alignment is
undone.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.783099843@infradead.org
---
 arch/x86/include/asm/nospec-branch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e04313e..3ef70e5 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -261,7 +261,7 @@
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
 	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
 		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
-		__stringify(__FILL_ONE_RETURN), \ftr2
+		__stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
 
 .Lskip_rsb_\@:
 .endm

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Fix overlapping alternatives
  2023-02-08 17:18 ` [PATCH 07/10] objtool: Fix overlapping alternatives Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     a706bb08c81ac878982e41d4b6abcc42258bd39e
Gitweb:        https://git.kernel.org/tip/a706bb08c81ac878982e41d4b6abcc42258bd39e
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:03 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:33 +01:00

objtool: Fix overlapping alternatives

Things like ALTERNATIVE_{2,3}() generate multiple alternatives on the
same place, objtool would override the first orig_alt_group with the
second (or third), failing to check the CFI among all the different
variants.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.711471461@infradead.org
---
 tools/objtool/check.c | 69 ++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6f0adb2..7e9d3d3 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1744,36 +1744,49 @@ static int handle_group_alt(struct objtool_file *file,
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
-	struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+	struct instruction *last_new_insn = NULL, *insn, *nop = NULL;
 	struct alt_group *orig_alt_group, *new_alt_group;
 	unsigned long dest_off;
 
-
-	orig_alt_group = malloc(sizeof(*orig_alt_group));
+	orig_alt_group = orig_insn->alt_group;
 	if (!orig_alt_group) {
-		WARN("malloc failed");
-		return -1;
-	}
-	orig_alt_group->cfi = calloc(special_alt->orig_len,
-				     sizeof(struct cfi_state *));
-	if (!orig_alt_group->cfi) {
-		WARN("calloc failed");
-		return -1;
-	}
+		struct instruction *last_orig_insn = NULL;
 
-	last_orig_insn = NULL;
-	insn = orig_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-			break;
+		orig_alt_group = malloc(sizeof(*orig_alt_group));
+		if (!orig_alt_group) {
+			WARN("malloc failed");
+			return -1;
+		}
+		orig_alt_group->cfi = calloc(special_alt->orig_len,
+					     sizeof(struct cfi_state *));
+		if (!orig_alt_group->cfi) {
+			WARN("calloc failed");
+			return -1;
+		}
 
-		insn->alt_group = orig_alt_group;
-		last_orig_insn = insn;
-	}
-	orig_alt_group->orig_group = NULL;
-	orig_alt_group->first_insn = orig_insn;
-	orig_alt_group->last_insn = last_orig_insn;
+		insn = orig_insn;
+		sec_for_each_insn_from(file, insn) {
+			if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+				break;
 
+			insn->alt_group = orig_alt_group;
+			last_orig_insn = insn;
+		}
+		orig_alt_group->orig_group = NULL;
+		orig_alt_group->first_insn = orig_insn;
+		orig_alt_group->last_insn = last_orig_insn;
+	} else {
+		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
+		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
+			WARN_FUNC("weirdly overlapping alternative! %ld != %d",
+				  orig_insn->sec, orig_insn->offset,
+				  orig_alt_group->last_insn->offset +
+				  orig_alt_group->last_insn->len -
+				  orig_alt_group->first_insn->offset,
+				  special_alt->orig_len);
+			return -1;
+		}
+	}
 
 	new_alt_group = malloc(sizeof(*new_alt_group));
 	if (!new_alt_group) {
@@ -1848,7 +1861,7 @@ static int handle_group_alt(struct objtool_file *file,
 
 		dest_off = arch_jump_destination(insn);
 		if (dest_off == special_alt->new_off + special_alt->new_len) {
-			insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+			insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn);
 			if (!insn->jump_dest) {
 				WARN_FUNC("can't find alternative jump destination",
 					  insn->sec, insn->offset);
@@ -3226,8 +3239,12 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
 		alt_cfi[group_off] = insn->cfi;
 	} else {
 		if (cficmp(alt_cfi[group_off], insn->cfi)) {
-			WARN_FUNC("stack layout conflict in alternatives",
-				  insn->sec, insn->offset);
+			struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group;
+			struct instruction *orig = orig_group->first_insn;
+			char *where = offstr(insn->sec, insn->offset);
+			WARN_FUNC("stack layout conflict in alternatives: %s",
+				  orig->sec, orig->offset, where);
+			free(where);
 			return -1;
 		}
 	}

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Union instruction::{call_dest,jump_table}
  2023-02-08 17:18 ` [PATCH 06/10] objtool: Union instruction::{call_dest,jump_table} Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     c6f5dc28fb3d736fa8d7f7d31e0664a9c772c299
Gitweb:        https://git.kernel.org/tip/c6f5dc28fb3d736fa8d7f7d31e0664a9c772c299
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:02 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:27 +01:00

objtool: Union instruction::{call_dest,jump_table}

The instruction call_dest and jump_table members can never be used at
the same time, their usage depends on type.

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	long unsigned int          immediate;            /*    64     8 */
 	unsigned int               len;                  /*    72     4 */
 	u8                         type;                 /*    76     1 */

 	/* Bitfield combined with previous fields */

 	u16                        dead_end:1;           /*    76: 8  2 */
 	u16                        ignore:1;             /*    76: 9  2 */
 	u16                        ignore_alts:1;        /*    76:10  2 */
 	u16                        hint:1;               /*    76:11  2 */
 	u16                        save:1;               /*    76:12  2 */
 	u16                        restore:1;            /*    76:13  2 */
 	u16                        retpoline_safe:1;     /*    76:14  2 */
 	u16                        noendbr:1;            /*    76:15  2 */
 	u16                        entry:1;              /*    78: 0  2 */
 	u16                        visited:4;            /*    78: 1  2 */
 	u16                        no_reloc:1;           /*    78: 5  2 */

 	/* XXX 2 bits hole, try to pack */
 	/* Bitfield combined with next fields */

 	s8                         instr;                /*    79     1 */
 	struct alt_group *         alt_group;            /*    80     8 */
-	struct symbol *            call_dest;            /*    88     8 */
-	struct instruction *       jump_dest;            /*    96     8 */
-	struct instruction *       first_jump_src;       /*   104     8 */
-	struct reloc *             jump_table;           /*   112     8 */
-	struct alternative *       alts;                 /*   120     8 */
+	struct instruction *       jump_dest;            /*    88     8 */
+	struct instruction *       first_jump_src;       /*    96     8 */
+	union {
+		struct symbol *    _call_dest;           /*   104     8 */
+		struct reloc *     _jump_table;          /*   104     8 */
+	};                                               /*   104     8 */
+	struct alternative *       alts;                 /*   112     8 */
+	struct symbol *            sym;                  /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct symbol *            sym;                  /*   128     8 */
-	struct stack_op *          stack_ops;            /*   136     8 */
-	struct cfi_state *         cfi;                  /*   144     8 */
+	struct stack_op *          stack_ops;            /*   128     8 */
+	struct cfi_state *         cfi;                  /*   136     8 */

-	/* size: 152, cachelines: 3, members: 29 */
-	/* sum members: 150 */
+	/* size: 144, cachelines: 3, members: 28 */
+	/* sum members: 142 */
 	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
-	/* last cacheline: 24 bytes */
+	/* last cacheline: 16 bytes */
 };

pre:	5:39.35 real,   215.58 user,    123.69 sys,     23448736 mem
post:	5:38.18 real,   213.25 user,    124.90 sys,     23449040 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.640914454@infradead.org
---
 tools/objtool/check.c                 | 73 ++++++++++++++++----------
 tools/objtool/include/objtool/check.h |  6 +-
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6d0ce23..6f0adb2 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -114,16 +114,34 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return NULL;
+
+	return insn->_call_dest;
+}
+
+static inline struct reloc *insn_jump_table(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return insn->_jump_table;
+
+	return NULL;
+}
+
 static bool is_jump_table_jump(struct instruction *insn)
 {
 	struct alt_group *alt_group = insn->alt_group;
 
-	if (insn->jump_table)
+	if (insn_jump_table(insn))
 		return true;
 
 	/* Retpoline alternative for a jump table? */
 	return alt_group && alt_group->orig_group &&
-	       alt_group->orig_group->first_insn->jump_table;
+	       insn_jump_table(alt_group->orig_group->first_insn);
 }
 
 static bool is_sibling_call(struct instruction *insn)
@@ -137,8 +155,8 @@ static bool is_sibling_call(struct instruction *insn)
 			return !is_jump_table_jump(insn);
 	}
 
-	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
-	return (is_static_jump(insn) && insn->call_dest);
+	/* add_jump_destinations() sets insn_call_dest(insn) for sibling calls. */
+	return (is_static_jump(insn) && insn_call_dest(insn));
 }
 
 /*
@@ -274,8 +292,8 @@ static void init_insn_state(struct objtool_file *file, struct insn_state *state,
 
 	/*
 	 * We need the full vmlinux for noinstr validation, otherwise we can
-	 * not correctly determine insn->call_dest->sec (external symbols do
-	 * not have a section).
+	 * not correctly determine insn_call_dest(insn)->sec (external symbols
+	 * do not have a section).
 	 */
 	if (opts.link && opts.noinstr && sec)
 		state->noinstr = sec->noinstr;
@@ -678,7 +696,7 @@ static int create_static_call_sections(struct objtool_file *file)
 			return -1;
 
 		/* find key symbol */
-		key_name = strdup(insn->call_dest->name);
+		key_name = strdup(insn_call_dest(insn)->name);
 		if (!key_name) {
 			perror("strdup");
 			return -1;
@@ -709,7 +727,7 @@ static int create_static_call_sections(struct objtool_file *file)
 			 * trampoline address.  This is fixed up in
 			 * static_call_add_module().
 			 */
-			key_sym = insn->call_dest;
+			key_sym = insn_call_dest(insn);
 		}
 		free(key_name);
 
@@ -1340,7 +1358,7 @@ static void annotate_call_site(struct objtool_file *file,
 			       struct instruction *insn, bool sibling)
 {
 	struct reloc *reloc = insn_reloc(file, insn);
-	struct symbol *sym = insn->call_dest;
+	struct symbol *sym = insn_call_dest(insn);
 
 	if (!sym)
 		sym = reloc->sym;
@@ -1425,7 +1443,7 @@ static void annotate_call_site(struct objtool_file *file,
 static void add_call_dest(struct objtool_file *file, struct instruction *insn,
 			  struct symbol *dest, bool sibling)
 {
-	insn->call_dest = dest;
+	insn->_call_dest = dest;
 	if (!dest)
 		return;
 
@@ -1683,12 +1701,12 @@ static int add_call_destinations(struct objtool_file *file)
 			if (insn->ignore)
 				continue;
 
-			if (!insn->call_dest) {
+			if (!insn_call_dest(insn)) {
 				WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
 				return -1;
 			}
 
-			if (insn_func(insn) && insn->call_dest->type != STT_FUNC) {
+			if (insn_func(insn) && insn_call_dest(insn)->type != STT_FUNC) {
 				WARN_FUNC("unsupported call to non-function",
 					  insn->sec, insn->offset);
 				return -1;
@@ -2125,7 +2143,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
 		reloc = find_jump_table(file, func, insn);
 		if (reloc) {
 			reloc->jump_table_start = true;
-			insn->jump_table = reloc;
+			insn->_jump_table = reloc;
 		}
 	}
 }
@@ -2137,10 +2155,10 @@ static int add_func_jump_tables(struct objtool_file *file,
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
-		if (!insn->jump_table)
+		if (!insn_jump_table(insn))
 			continue;
 
-		ret = add_jump_table(file, insn, insn->jump_table);
+		ret = add_jump_table(file, insn, insn_jump_table(insn));
 		if (ret)
 			return ret;
 	}
@@ -2612,8 +2630,8 @@ static int decode_sections(struct objtool_file *file)
 static bool is_fentry_call(struct instruction *insn)
 {
 	if (insn->type == INSN_CALL &&
-	    insn->call_dest &&
-	    insn->call_dest->fentry)
+	    insn_call_dest(insn) &&
+	    insn_call_dest(insn)->fentry)
 		return true;
 
 	return false;
@@ -3320,8 +3338,8 @@ static inline const char *call_dest_name(struct instruction *insn)
 	struct reloc *rel;
 	int idx;
 
-	if (insn->call_dest)
-		return insn->call_dest->name;
+	if (insn_call_dest(insn))
+		return insn_call_dest(insn)->name;
 
 	rel = insn_reloc(NULL, insn);
 	if (rel && !strcmp(rel->sym->name, "pv_ops")) {
@@ -3403,13 +3421,13 @@ static int validate_call(struct objtool_file *file,
 			 struct insn_state *state)
 {
 	if (state->noinstr && state->instr <= 0 &&
-	    !noinstr_call_dest(file, insn, insn->call_dest)) {
+	    !noinstr_call_dest(file, insn, insn_call_dest(insn))) {
 		WARN_FUNC("call to %s() leaves .noinstr.text section",
 				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
 	}
 
-	if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
+	if (state->uaccess && !func_uaccess_safe(insn_call_dest(insn))) {
 		WARN_FUNC("call to %s() with UACCESS enabled",
 				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
@@ -3847,11 +3865,11 @@ static int validate_entry(struct objtool_file *file, struct instruction *insn)
 
 			/* fallthrough */
 		case INSN_CALL:
-			dest = find_insn(file, insn->call_dest->sec,
-					 insn->call_dest->offset);
+			dest = find_insn(file, insn_call_dest(insn)->sec,
+					 insn_call_dest(insn)->offset);
 			if (!dest) {
 				WARN("Unresolved function after linking!?: %s",
-				     insn->call_dest->name);
+				     insn_call_dest(insn)->name);
 				return -1;
 			}
 
@@ -3952,13 +3970,13 @@ static int validate_retpoline(struct objtool_file *file)
 static bool is_kasan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+		!strcmp(insn_call_dest(insn)->name, "__asan_handle_no_return"));
 }
 
 static bool is_ubsan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name,
+		!strcmp(insn_call_dest(insn)->name,
 			"__ubsan_handle_builtin_unreachable"));
 }
 
@@ -4036,7 +4054,8 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
 	prev_insn = list_prev_entry(insn, list);
-	if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
+	if ((prev_insn->dead_end ||
+	     dead_end_function(file, insn_call_dest(prev_insn))) &&
 	    (insn->type == INSN_BUG ||
 	     (insn->type == INSN_JUMP_UNCONDITIONAL &&
 	      insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index fffc8b8..ab6deae 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -62,10 +62,12 @@ struct instruction {
 	s8 instr;
 
 	struct alt_group *alt_group;
-	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
-	struct reloc *jump_table;
+	union {
+		struct symbol *_call_dest;
+		struct reloc *_jump_table;
+	};
 	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Remove instruction::list
  2023-02-08 17:18 ` [PATCH 09/10] objtool: Remove instruction::list Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     1c34496e5856886d565665fb64029ecdeb080ffb
Gitweb:        https://git.kernel.org/tip/1c34496e5856886d565665fb64029ecdeb080ffb
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:05 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:44 +01:00

objtool: Remove instruction::list

Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.

 struct instruction {
-	struct list_head           list;                 /*     0    16 */
-	struct hlist_node          hash;                 /*    16    16 */
-	struct list_head           call_node;            /*    32    16 */
-	struct section *           sec;                  /*    48     8 */
-	long unsigned int          offset;               /*    56     8 */
-	/* --- cacheline 1 boundary (64 bytes) --- */
-	long unsigned int          immediate;            /*    64     8 */
-	unsigned int               len;                  /*    72     4 */
-	u8                         type;                 /*    76     1 */
-
-	/* Bitfield combined with previous fields */
+	struct hlist_node          hash;                 /*     0    16 */
+	struct list_head           call_node;            /*    16    16 */
+	struct section *           sec;                  /*    32     8 */
+	long unsigned int          offset;               /*    40     8 */
+	long unsigned int          immediate;            /*    48     8 */
+	u8                         len;                  /*    56     1 */
+	u8                         prev_len;             /*    57     1 */
+	u8                         type;                 /*    58     1 */
+	s8                         instr;                /*    59     1 */
+	u32                        idx:8;                /*    60: 0  4 */
+	u32                        dead_end:1;           /*    60: 8  4 */
+	u32                        ignore:1;             /*    60: 9  4 */
+	u32                        ignore_alts:1;        /*    60:10  4 */
+	u32                        hint:1;               /*    60:11  4 */
+	u32                        save:1;               /*    60:12  4 */
+	u32                        restore:1;            /*    60:13  4 */
+	u32                        retpoline_safe:1;     /*    60:14  4 */
+	u32                        noendbr:1;            /*    60:15  4 */
+	u32                        entry:1;              /*    60:16  4 */
+	u32                        visited:4;            /*    60:17  4 */
+	u32                        no_reloc:1;           /*    60:21  4 */

-	u16                        dead_end:1;           /*    76: 8  2 */
-	u16                        ignore:1;             /*    76: 9  2 */
-	u16                        ignore_alts:1;        /*    76:10  2 */
-	u16                        hint:1;               /*    76:11  2 */
-	u16                        save:1;               /*    76:12  2 */
-	u16                        restore:1;            /*    76:13  2 */
-	u16                        retpoline_safe:1;     /*    76:14  2 */
-	u16                        noendbr:1;            /*    76:15  2 */
-	u16                        entry:1;              /*    78: 0  2 */
-	u16                        visited:4;            /*    78: 1  2 */
-	u16                        no_reloc:1;           /*    78: 5  2 */
+	/* XXX 10 bits hole, try to pack */

-	/* XXX 2 bits hole, try to pack */
-	/* Bitfield combined with next fields */
-
-	s8                         instr;                /*    79     1 */
-	struct alt_group *         alt_group;            /*    80     8 */
-	struct instruction *       jump_dest;            /*    88     8 */
-	struct instruction *       first_jump_src;       /*    96     8 */
+	/* --- cacheline 1 boundary (64 bytes) --- */
+	struct alt_group *         alt_group;            /*    64     8 */
+	struct instruction *       jump_dest;            /*    72     8 */
+	struct instruction *       first_jump_src;       /*    80     8 */
 	union {
-		struct symbol *    _call_dest;           /*   104     8 */
-		struct reloc *     _jump_table;          /*   104     8 */
-	};                                               /*   104     8 */
-	struct alternative *       alts;                 /*   112     8 */
-	struct symbol *            sym;                  /*   120     8 */
-	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct stack_op *          stack_ops;            /*   128     8 */
-	struct cfi_state *         cfi;                  /*   136     8 */
+		struct symbol *    _call_dest;           /*    88     8 */
+		struct reloc *     _jump_table;          /*    88     8 */
+	};                                               /*    88     8 */
+	struct alternative *       alts;                 /*    96     8 */
+	struct symbol *            sym;                  /*   104     8 */
+	struct stack_op *          stack_ops;            /*   112     8 */
+	struct cfi_state *         cfi;                  /*   120     8 */

-	/* size: 144, cachelines: 3, members: 28 */
-	/* sum members: 142 */
-	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
-	/* last cacheline: 16 bytes */
+	/* size: 128, cachelines: 2, members: 29 */
+	/* sum members: 124 */
+	/* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
 };

pre:	5:38.18 real,   213.25 user,    124.90 sys,     23449040 mem
post:	5:03.34 real,   210.75 user,    88.80 sys,      20241232 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org
---
 tools/objtool/check.c                   | 166 ++++++++++++++---------
 tools/objtool/include/objtool/check.h   |  51 +++----
 tools/objtool/include/objtool/objtool.h |   1 +-
 tools/objtool/objtool.c                 |   1 +-
 4 files changed, 133 insertions(+), 86 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 7e9d3d3..b0b467d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -47,27 +47,29 @@ struct instruction *find_insn(struct objtool_file *file,
 	return NULL;
 }
 
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-					      struct instruction *insn)
+struct instruction *next_insn_same_sec(struct objtool_file *file,
+				       struct instruction *insn)
 {
-	struct instruction *next = list_next_entry(insn, list);
+	if (insn->idx == INSN_CHUNK_MAX)
+		return find_insn(file, insn->sec, insn->offset + insn->len);
 
-	if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+	insn++;
+	if (!insn->len)
 		return NULL;
 
-	return next;
+	return insn;
 }
 
 static struct instruction *next_insn_same_func(struct objtool_file *file,
 					       struct instruction *insn)
 {
-	struct instruction *next = list_next_entry(insn, list);
+	struct instruction *next = next_insn_same_sec(file, insn);
 	struct symbol *func = insn_func(insn);
 
 	if (!func)
 		return NULL;
 
-	if (&next->list != &file->insn_list && insn_func(next) == func)
+	if (next && insn_func(next) == func)
 		return next;
 
 	/* Check if we're already in the subfunction: */
@@ -78,17 +80,35 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 	return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
+static struct instruction *prev_insn_same_sec(struct objtool_file *file,
+					      struct instruction *insn)
+{
+	if (insn->idx == 0) {
+		if (insn->prev_len)
+			return find_insn(file, insn->sec, insn->offset - insn->prev_len);
+		return NULL;
+	}
+
+	return insn - 1;
+}
+
 static struct instruction *prev_insn_same_sym(struct objtool_file *file,
-					       struct instruction *insn)
+					      struct instruction *insn)
 {
-	struct instruction *prev = list_prev_entry(insn, list);
+	struct instruction *prev = prev_insn_same_sec(file, insn);
 
-	if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn))
+	if (prev && insn_func(prev) == insn_func(insn))
 		return prev;
 
 	return NULL;
 }
 
+#define for_each_insn(file, insn)					\
+	for (struct section *__sec, *__fake = (struct section *)1;	\
+	     __fake; __fake = NULL)					\
+		for_each_sec(file, __sec)				\
+			sec_for_each_insn(file, __sec, insn)
+
 #define func_for_each_insn(file, func, insn)				\
 	for (insn = find_insn(file, func->sec, func->offset);		\
 	     insn;							\
@@ -96,16 +116,13 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 
 #define sym_for_each_insn(file, sym, insn)				\
 	for (insn = find_insn(file, sym->sec, sym->offset);		\
-	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == sym->sec &&				\
-		insn->offset < sym->offset + sym->len;			\
-	     insn = list_next_entry(insn, list))
+	     insn && insn->offset < sym->offset + sym->len;		\
+	     insn = next_insn_same_sec(file, insn))
 
 #define sym_for_each_insn_continue_reverse(file, sym, insn)		\
-	for (insn = list_prev_entry(insn, list);			\
-	     &insn->list != &file->insn_list &&				\
-		insn->sec == sym->sec && insn->offset >= sym->offset;	\
-	     insn = list_prev_entry(insn, list))
+	for (insn = prev_insn_same_sec(file, insn);			\
+	     insn && insn->offset >= sym->offset;			\
+	     insn = prev_insn_same_sec(file, insn))
 
 #define sec_for_each_insn_from(file, insn)				\
 	for (; insn; insn = next_insn_same_sec(file, insn))
@@ -384,6 +401,9 @@ static int decode_instructions(struct objtool_file *file)
 	int ret;
 
 	for_each_sec(file, sec) {
+		struct instruction *insns = NULL;
+		u8 prev_len = 0;
+		u8 idx = 0;
 
 		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
 			continue;
@@ -409,22 +429,31 @@ static int decode_instructions(struct objtool_file *file)
 			sec->init = true;
 
 		for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
-			insn = malloc(sizeof(*insn));
-			if (!insn) {
-				WARN("malloc failed");
-				return -1;
+			if (!insns || idx == INSN_CHUNK_MAX) {
+				insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+				if (!insns) {
+					WARN("malloc failed");
+					return -1;
+				}
+				idx = 0;
+			} else {
+				idx++;
 			}
-			memset(insn, 0, sizeof(*insn));
-			INIT_LIST_HEAD(&insn->call_node);
+			insn = &insns[idx];
+			insn->idx = idx;
 
+			INIT_LIST_HEAD(&insn->call_node);
 			insn->sec = sec;
 			insn->offset = offset;
+			insn->prev_len = prev_len;
 
 			ret = arch_decode_instruction(file, sec, offset,
 						      sec->sh.sh_size - offset,
 						      insn);
 			if (ret)
-				goto err;
+				return ret;
+
+			prev_len = insn->len;
 
 			/*
 			 * By default, "ud2" is a dead end unless otherwise
@@ -435,10 +464,11 @@ static int decode_instructions(struct objtool_file *file)
 				insn->dead_end = true;
 
 			hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
-			list_add_tail(&insn->list, &file->insn_list);
 			nr_insns++;
 		}
 
+//		printf("%s: last chunk used: %d\n", sec->name, (int)idx);
+
 		list_for_each_entry(func, &sec->symbol_list, list) {
 			if (func->type != STT_NOTYPE && func->type != STT_FUNC)
 				continue;
@@ -481,10 +511,6 @@ static int decode_instructions(struct objtool_file *file)
 		printf("nr_insns: %lu\n", nr_insns);
 
 	return 0;
-
-err:
-	free(insn);
-	return ret;
 }
 
 /*
@@ -599,7 +625,7 @@ static int add_dead_ends(struct objtool_file *file)
 		}
 		insn = find_insn(file, reloc->sym->sec, reloc->addend);
 		if (insn)
-			insn = list_prev_entry(insn, list);
+			insn = prev_insn_same_sec(file, insn);
 		else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
 			insn = find_last_insn(file, reloc->sym->sec);
 			if (!insn) {
@@ -634,7 +660,7 @@ reachable:
 		}
 		insn = find_insn(file, reloc->sym->sec, reloc->addend);
 		if (insn)
-			insn = list_prev_entry(insn, list);
+			insn = prev_insn_same_sec(file, insn);
 		else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
 			insn = find_last_insn(file, reloc->sym->sec);
 			if (!insn) {
@@ -1775,6 +1801,7 @@ static int handle_group_alt(struct objtool_file *file,
 		orig_alt_group->orig_group = NULL;
 		orig_alt_group->first_insn = orig_insn;
 		orig_alt_group->last_insn = last_orig_insn;
+		orig_alt_group->nop = NULL;
 	} else {
 		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
 		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
@@ -1876,12 +1903,11 @@ static int handle_group_alt(struct objtool_file *file,
 		return -1;
 	}
 
-	if (nop)
-		list_add(&nop->list, &last_new_insn->list);
 end:
 	new_alt_group->orig_group = orig_alt_group;
 	new_alt_group->first_insn = *new_insn;
-	new_alt_group->last_insn = nop ? : last_new_insn;
+	new_alt_group->last_insn = last_new_insn;
+	new_alt_group->nop = nop;
 	new_alt_group->cfi = orig_alt_group->cfi;
 	return 0;
 }
@@ -1931,7 +1957,7 @@ static int handle_jump_alt(struct objtool_file *file,
 	else
 		file->jl_long++;
 
-	*new_insn = list_next_entry(orig_insn, list);
+	*new_insn = next_insn_same_sec(file, orig_insn);
 	return 0;
 }
 
@@ -3522,11 +3548,28 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file,
 	 * Simulate the fact that alternatives are patched in-place.  When the
 	 * end of a replacement alt_group is reached, redirect objtool flow to
 	 * the end of the original alt_group.
+	 *
+	 * insn->alts->insn -> alt_group->first_insn
+	 *		       ...
+	 *		       alt_group->last_insn
+	 *		       [alt_group->nop]      -> next(orig_group->last_insn)
 	 */
-	if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
-		return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+	if (alt_group) {
+		if (alt_group->nop) {
+			/* ->nop implies ->orig_group */
+			if (insn == alt_group->last_insn)
+				return alt_group->nop;
+			if (insn == alt_group->nop)
+				goto next_orig;
+		}
+		if (insn == alt_group->last_insn && alt_group->orig_group)
+			goto next_orig;
+	}
 
 	return next_insn_same_sec(file, insn);
+
+next_orig:
+	return next_insn_same_sec(file, alt_group->orig_group->last_insn);
 }
 
 /*
@@ -3777,11 +3820,25 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 	return 0;
 }
 
+static int validate_unwind_hint(struct objtool_file *file,
+				  struct instruction *insn,
+				  struct insn_state *state)
+{
+	if (insn->hint && !insn->visited && !insn->ignore) {
+		int ret = validate_branch(file, insn_func(insn), insn, *state);
+		if (ret && opts.backtrace)
+			BT_FUNC("<=== (hint)", insn);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 {
 	struct instruction *insn;
 	struct insn_state state;
-	int ret, warnings = 0;
+	int warnings = 0;
 
 	if (!file->hints)
 		return 0;
@@ -3789,22 +3846,11 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 	init_insn_state(file, &state, sec);
 
 	if (sec) {
-		insn = find_insn(file, sec, 0);
-		if (!insn)
-			return 0;
+		sec_for_each_insn(file, sec, insn)
+			warnings += validate_unwind_hint(file, insn, &state);
 	} else {
-		insn = list_first_entry(&file->insn_list, typeof(*insn), list);
-	}
-
-	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
-		if (insn->hint && !insn->visited && !insn->ignore) {
-			ret = validate_branch(file, insn_func(insn), insn, state);
-			if (ret && opts.backtrace)
-				BT_FUNC("<=== (hint)", insn);
-			warnings += ret;
-		}
-
-		insn = list_next_entry(insn, list);
+		for_each_insn(file, insn)
+			warnings += validate_unwind_hint(file, insn, &state);
 	}
 
 	return warnings;
@@ -4070,7 +4116,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	 *
 	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
-	prev_insn = list_prev_entry(insn, list);
+	prev_insn = prev_insn_same_sec(file, insn);
 	if ((prev_insn->dead_end ||
 	     dead_end_function(file, insn_call_dest(prev_insn))) &&
 	    (insn->type == INSN_BUG ||
@@ -4102,7 +4148,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 		if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
 			break;
 
-		insn = list_next_entry(insn, list);
+		insn = next_insn_same_sec(file, insn);
 	}
 
 	return false;
@@ -4115,10 +4161,10 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func,
 		return 0;
 
 	for (;;) {
-		struct instruction *prev = list_prev_entry(insn, list);
+		struct instruction *prev = prev_insn_same_sec(file, insn);
 		u64 offset;
 
-		if (&prev->list == &file->insn_list)
+		if (!prev)
 			break;
 
 		if (prev->type != INSN_NOP)
@@ -4517,7 +4563,7 @@ int check(struct objtool_file *file)
 
 	warnings += ret;
 
-	if (list_empty(&file->insn_list))
+	if (!nr_insns)
 		goto out;
 
 	if (opts.retpoline) {
@@ -4626,7 +4672,7 @@ int check(struct objtool_file *file)
 		warnings += ret;
 	}
 
-	if (opts.orc && !list_empty(&file->insn_list)) {
+	if (opts.orc && nr_insns) {
 		ret = orc_create(file);
 		if (ret < 0)
 			goto out;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index ab6deae..3e7c700 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -27,7 +27,7 @@ struct alt_group {
 	struct alt_group *orig_group;
 
 	/* First and last instructions in the group */
-	struct instruction *first_insn, *last_insn;
+	struct instruction *first_insn, *last_insn, *nop;
 
 	/*
 	 * Byte-offset-addressed len-sized array of pointers to CFI structs.
@@ -36,31 +36,36 @@ struct alt_group {
 	struct cfi_state **cfi;
 };
 
+#define INSN_CHUNK_BITS		8
+#define INSN_CHUNK_SIZE		(1 << INSN_CHUNK_BITS)
+#define INSN_CHUNK_MAX		(INSN_CHUNK_SIZE - 1)
+
 struct instruction {
-	struct list_head list;
 	struct hlist_node hash;
 	struct list_head call_node;
 	struct section *sec;
 	unsigned long offset;
 	unsigned long immediate;
-	unsigned int len;
-	u8 type;
-
-	u16 dead_end		: 1,
-	   ignore		: 1,
-	   ignore_alts		: 1,
-	   hint			: 1,
-	   save			: 1,
-	   restore		: 1,
-	   retpoline_safe	: 1,
-	   noendbr		: 1,
-	   entry		: 1,
-	   visited		: 4,
-	   no_reloc		: 1;
-		/* 2 bit hole */
 
+	u8 len;
+	u8 prev_len;
+	u8 type;
 	s8 instr;
 
+	u32 idx			: INSN_CHUNK_BITS,
+	    dead_end		: 1,
+	    ignore		: 1,
+	    ignore_alts		: 1,
+	    hint		: 1,
+	    save		: 1,
+	    restore		: 1,
+	    retpoline_safe	: 1,
+	    noendbr		: 1,
+	    entry		: 1,
+	    visited		: 4,
+	    no_reloc		: 1;
+		/* 10 bit hole */
+
 	struct alt_group *alt_group;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
@@ -109,13 +114,11 @@ static inline bool is_jump(struct instruction *insn)
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
 
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
+struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruction *insn);
 
-#define sec_for_each_insn(file, sec, insn)				\
-	for (insn = find_insn(file, sec, 0);				\
-	     insn && &insn->list != &file->insn_list &&			\
-			insn->sec == sec;				\
-	     insn = list_next_entry(insn, list))
+#define sec_for_each_insn(file, _sec, insn)				\
+	for (insn = find_insn(file, _sec, 0);				\
+	     insn && insn->sec == _sec;					\
+	     insn = next_insn_same_sec(file, insn))
 
 #endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 6b40977..94a33ee 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -21,7 +21,6 @@ struct pv_state {
 
 struct objtool_file {
 	struct elf *elf;
-	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 20);
 	struct list_head retpoline_call_list;
 	struct list_head return_thunk_list;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 6affd80..c54f723 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -99,7 +99,6 @@ struct objtool_file *objtool_open_read(const char *_objname)
 		return NULL;
 	}
 
-	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	INIT_LIST_HEAD(&file.retpoline_call_list);
 	INIT_LIST_HEAD(&file.return_thunk_list);

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Shrink instruction::{type,visited}
  2023-02-08 17:18 ` [PATCH 04/10] objtool: Shrink instruction::{type,visited} Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     8b2de412158ecdb312c707918432e6650df808cc
Gitweb:        https://git.kernel.org/tip/8b2de412158ecdb312c707918432e6650df808cc
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:00 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:12 +01:00

objtool: Shrink instruction::{type,visited}

Since we don't have that many types in enum insn_type, force it into a
u8 and re-arrange member to get rid of the holes, saves another 8
bytes.

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
-	unsigned int               len;                  /*    64     4 */
-	enum insn_type             type;                 /*    68     4 */
-	long unsigned int          immediate;            /*    72     8 */
-	u16                        dead_end:1;           /*    80: 0  2 */
-	u16                        ignore:1;             /*    80: 1  2 */
-	u16                        ignore_alts:1;        /*    80: 2  2 */
-	u16                        hint:1;               /*    80: 3  2 */
-	u16                        save:1;               /*    80: 4  2 */
-	u16                        restore:1;            /*    80: 5  2 */
-	u16                        retpoline_safe:1;     /*    80: 6  2 */
-	u16                        noendbr:1;            /*    80: 7  2 */
-	u16                        entry:1;              /*    80: 8  2 */
+	long unsigned int          immediate;            /*    64     8 */
+	unsigned int               len;                  /*    72     4 */
+	u8                         type;                 /*    76     1 */

-	/* XXX 7 bits hole, try to pack */
+	/* Bitfield combined with previous fields */

-	s8                         instr;                /*    82     1 */
-	u8                         visited;              /*    83     1 */
+	u16                        dead_end:1;           /*    76: 8  2 */
+	u16                        ignore:1;             /*    76: 9  2 */
+	u16                        ignore_alts:1;        /*    76:10  2 */
+	u16                        hint:1;               /*    76:11  2 */
+	u16                        save:1;               /*    76:12  2 */
+	u16                        restore:1;            /*    76:13  2 */
+	u16                        retpoline_safe:1;     /*    76:14  2 */
+	u16                        noendbr:1;            /*    76:15  2 */
+	u16                        entry:1;              /*    78: 0  2 */
+	u16                        visited:4;            /*    78: 1  2 */

-	/* XXX 4 bytes hole, try to pack */
+	/* XXX 3 bits hole, try to pack */
+	/* Bitfield combined with next fields */

-	struct alt_group *         alt_group;            /*    88     8 */
-	struct symbol *            call_dest;            /*    96     8 */
-	struct instruction *       jump_dest;            /*   104     8 */
-	struct instruction *       first_jump_src;       /*   112     8 */
-	struct reloc *             jump_table;           /*   120     8 */
+	s8                         instr;                /*    79     1 */
+	struct alt_group *         alt_group;            /*    80     8 */
+	struct symbol *            call_dest;            /*    88     8 */
+	struct instruction *       jump_dest;            /*    96     8 */
+	struct instruction *       first_jump_src;       /*   104     8 */
+	struct reloc *             jump_table;           /*   112     8 */
+	struct reloc *             reloc;                /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct reloc *             reloc;                /*   128     8 */
-	struct alternative *       alts;                 /*   136     8 */
-	struct symbol *            sym;                  /*   144     8 */
-	struct stack_op *          stack_ops;            /*   152     8 */
-	struct cfi_state *         cfi;                  /*   160     8 */
+	struct alternative *       alts;                 /*   128     8 */
+	struct symbol *            sym;                  /*   136     8 */
+	struct stack_op *          stack_ops;            /*   144     8 */
+	struct cfi_state *         cfi;                  /*   152     8 */

-	/* size: 168, cachelines: 3, members: 29 */
-	/* sum members: 162, holes: 1, sum holes: 4 */
-	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 40 bytes */
+	/* size: 160, cachelines: 3, members: 29 */
+	/* sum members: 158 */
+	/* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
+	/* last cacheline: 32 bytes */
 };

pre:	5:48.86 real,   220.30 user,    128.34 sys,     24834672 mem
post:	5:48.89 real,   220.96 user,    127.55 sys,     24834672 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.501847188@infradead.org
---
 tools/objtool/include/objtool/check.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 7966f60..a497ee7 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -42,9 +42,9 @@ struct instruction {
 	struct list_head call_node;
 	struct section *sec;
 	unsigned long offset;
-	unsigned int len;
-	enum insn_type type;
 	unsigned long immediate;
+	unsigned int len;
+	u8 type;
 
 	u16 dead_end		: 1,
 	   ignore		: 1,
@@ -54,11 +54,11 @@ struct instruction {
 	   restore		: 1,
 	   retpoline_safe	: 1,
 	   noendbr		: 1,
-	   entry		: 1;
-		/* 7 bit hole */
+	   entry		: 1,
+	   visited		: 4;
+		/* 3 bit hole */
 
 	s8 instr;
-	u8 visited;
 
 	struct alt_group *alt_group;
 	struct symbol *call_dest;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Remove instruction::reloc
  2023-02-08 17:18 ` [PATCH 05/10] objtool: Remove instruction::reloc Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     0932dbe1f5680481e612cafe0c7d0f1796f68612
Gitweb:        https://git.kernel.org/tip/0932dbe1f5680481e612cafe0c7d0f1796f68612
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:18:01 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:17 +01:00

objtool: Remove instruction::reloc

Instead of caching the reloc for each instruction, only keep a
negative cache of not having a reloc (by far the most common case).

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	long unsigned int          immediate;            /*    64     8 */
 	unsigned int               len;                  /*    72     4 */
 	u8                         type;                 /*    76     1 */

 	/* Bitfield combined with previous fields */

 	u16                        dead_end:1;           /*    76: 8  2 */
 	u16                        ignore:1;             /*    76: 9  2 */
 	u16                        ignore_alts:1;        /*    76:10  2 */
 	u16                        hint:1;               /*    76:11  2 */
 	u16                        save:1;               /*    76:12  2 */
 	u16                        restore:1;            /*    76:13  2 */
 	u16                        retpoline_safe:1;     /*    76:14  2 */
 	u16                        noendbr:1;            /*    76:15  2 */
 	u16                        entry:1;              /*    78: 0  2 */
 	u16                        visited:4;            /*    78: 1  2 */
+	u16                        no_reloc:1;           /*    78: 5  2 */

-	/* XXX 3 bits hole, try to pack */
+	/* XXX 2 bits hole, try to pack */
 	/* Bitfield combined with next fields */

 	s8                         instr;                /*    79     1 */
 	struct alt_group *         alt_group;            /*    80     8 */
 	struct symbol *            call_dest;            /*    88     8 */
 	struct instruction *       jump_dest;            /*    96     8 */
 	struct instruction *       first_jump_src;       /*   104     8 */
 	struct reloc *             jump_table;           /*   112     8 */
-	struct reloc *             reloc;                /*   120     8 */
+	struct alternative *       alts;                 /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
-	struct alternative *       alts;                 /*   128     8 */
-	struct symbol *            sym;                  /*   136     8 */
-	struct stack_op *          stack_ops;            /*   144     8 */
-	struct cfi_state *         cfi;                  /*   152     8 */
+	struct symbol *            sym;                  /*   128     8 */
+	struct stack_op *          stack_ops;            /*   136     8 */
+	struct cfi_state *         cfi;                  /*   144     8 */

-	/* size: 160, cachelines: 3, members: 29 */
-	/* sum members: 158 */
-	/* sum bitfield members: 13 bits, bit holes: 1, sum bit holes: 3 bits */
-	/* last cacheline: 32 bytes */
+	/* size: 152, cachelines: 3, members: 29 */
+	/* sum members: 150 */
+	/* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
+	/* last cacheline: 24 bytes */
 };

pre:	5:48.89 real,   220.96 user,    127.55 sys,     24834672 mem
post:	5:39.35 real,   215.58 user,    123.69 sys,     23448736 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.572145269@infradead.org
---
 tools/objtool/check.c                 | 24 +++++++++++-------------
 tools/objtool/include/objtool/check.h |  6 +++---
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9f83e85..6d0ce23 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1305,26 +1305,24 @@ __weak bool arch_is_rethunk(struct symbol *sym)
 	return false;
 }
 
-#define NEGATIVE_RELOC	((void *)-1L)
-
 static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
 {
-	if (insn->reloc == NEGATIVE_RELOC)
+	struct reloc *reloc;
+
+	if (insn->no_reloc)
 		return NULL;
 
-	if (!insn->reloc) {
-		if (!file)
-			return NULL;
+	if (!file)
+		return NULL;
 
-		insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-						       insn->offset, insn->len);
-		if (!insn->reloc) {
-			insn->reloc = NEGATIVE_RELOC;
-			return NULL;
-		}
+	reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+					 insn->offset, insn->len);
+	if (!reloc) {
+		insn->no_reloc = 1;
+		return NULL;
 	}
 
-	return insn->reloc;
+	return reloc;
 }
 
 static void remove_insn_ops(struct instruction *insn)
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index a497ee7..fffc8b8 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -55,8 +55,9 @@ struct instruction {
 	   retpoline_safe	: 1,
 	   noendbr		: 1,
 	   entry		: 1,
-	   visited		: 4;
-		/* 3 bit hole */
+	   visited		: 4,
+	   no_reloc		: 1;
+		/* 2 bit hole */
 
 	s8 instr;
 
@@ -65,7 +66,6 @@ struct instruction {
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
-	struct reloc *reloc;
 	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Make instruction::alts a single-linked list
  2023-02-08 17:17 ` [PATCH 03/10] objtool: Make instruction::alts " Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     d54066546121426ecd7ad01a53ae429c4e37a9d5
Gitweb:        https://git.kernel.org/tip/d54066546121426ecd7ad01a53ae429c4e37a9d5
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:17:59 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:21:06 +01:00

objtool: Make instruction::alts a single-linked list

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	unsigned int               len;                  /*    64     4 */
 	enum insn_type             type;                 /*    68     4 */
 	long unsigned int          immediate;            /*    72     8 */
 	u16                        dead_end:1;           /*    80: 0  2 */
 	u16                        ignore:1;             /*    80: 1  2 */
 	u16                        ignore_alts:1;        /*    80: 2  2 */
 	u16                        hint:1;               /*    80: 3  2 */
 	u16                        save:1;               /*    80: 4  2 */
 	u16                        restore:1;            /*    80: 5  2 */
 	u16                        retpoline_safe:1;     /*    80: 6  2 */
 	u16                        noendbr:1;            /*    80: 7  2 */
 	u16                        entry:1;              /*    80: 8  2 */

 	/* XXX 7 bits hole, try to pack */

 	s8                         instr;                /*    82     1 */
 	u8                         visited;              /*    83     1 */

 	/* XXX 4 bytes hole, try to pack */

 	struct alt_group *         alt_group;            /*    88     8 */
 	struct symbol *            call_dest;            /*    96     8 */
 	struct instruction *       jump_dest;            /*   104     8 */
 	struct instruction *       first_jump_src;       /*   112     8 */
 	struct reloc *             jump_table;           /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct reloc *             reloc;                /*   128     8 */
-	struct list_head           alts;                 /*   136    16 */
-	struct symbol *            sym;                  /*   152     8 */
-	struct stack_op *          stack_ops;            /*   160     8 */
-	struct cfi_state *         cfi;                  /*   168     8 */
+	struct alternative *       alts;                 /*   136     8 */
+	struct symbol *            sym;                  /*   144     8 */
+	struct stack_op *          stack_ops;            /*   152     8 */
+	struct cfi_state *         cfi;                  /*   160     8 */

-	/* size: 176, cachelines: 3, members: 29 */
-	/* sum members: 170, holes: 1, sum holes: 4 */
+	/* size: 168, cachelines: 3, members: 29 */
+	/* sum members: 162, holes: 1, sum holes: 4 */
 	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 48 bytes */
+	/* last cacheline: 40 bytes */
 };

pre:	5:58.50 real,   229.64 user,    128.65 sys,     26221520 mem
post:	5:48.86 real,   220.30 user,    128.34 sys,     24834672 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.430556498@infradead.org
---
 tools/objtool/check.c                 | 18 +++++++++---------
 tools/objtool/include/objtool/check.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8109d74..9f83e85 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -23,7 +23,7 @@
 #include <linux/static_call_types.h>
 
 struct alternative {
-	struct list_head list;
+	struct alternative *next;
 	struct instruction *insn;
 	bool skip_orig;
 };
@@ -397,7 +397,6 @@ static int decode_instructions(struct objtool_file *file)
 				return -1;
 			}
 			memset(insn, 0, sizeof(*insn));
-			INIT_LIST_HEAD(&insn->alts);
 			INIT_LIST_HEAD(&insn->call_node);
 
 			insn->sec = sec;
@@ -1780,7 +1779,6 @@ static int handle_group_alt(struct objtool_file *file,
 			return -1;
 		}
 		memset(nop, 0, sizeof(*nop));
-		INIT_LIST_HEAD(&nop->alts);
 
 		nop->sec = special_alt->new_sec;
 		nop->offset = special_alt->new_off + special_alt->new_len;
@@ -1978,7 +1976,8 @@ static int add_special_section_alts(struct objtool_file *file)
 		alt->insn = new_insn;
 		alt->skip_orig = special_alt->skip_orig;
 		orig_insn->ignore_alts |= special_alt->skip_alt;
-		list_add_tail(&alt->list, &orig_insn->alts);
+		alt->next = orig_insn->alts;
+		orig_insn->alts = alt;
 
 		list_del(&special_alt->list);
 		free(special_alt);
@@ -2037,7 +2036,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 		}
 
 		alt->insn = dest_insn;
-		list_add_tail(&alt->list, &insn->alts);
+		alt->next = insn->alts;
+		insn->alts = alt;
 		prev_offset = reloc->offset;
 	}
 
@@ -3594,10 +3594,10 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 		if (propagate_alt_cfi(file, insn))
 			return 1;
 
-		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+		if (!insn->ignore_alts && insn->alts) {
 			bool skip_orig = false;
 
-			list_for_each_entry(alt, &insn->alts, list) {
+			for (alt = insn->alts; alt; alt = alt->next) {
 				if (alt->skip_orig)
 					skip_orig = true;
 
@@ -3796,11 +3796,11 @@ static int validate_entry(struct objtool_file *file, struct instruction *insn)
 
 		insn->visited |= VISITED_ENTRY;
 
-		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+		if (!insn->ignore_alts && insn->alts) {
 			struct alternative *alt;
 			bool skip_orig = false;
 
-			list_for_each_entry(alt, &insn->alts, list) {
+			for (alt = insn->alts; alt; alt = alt->next) {
 				if (alt->skip_orig)
 					skip_orig = true;
 
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 23e9819..7966f60 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -66,7 +66,7 @@ struct instruction {
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
 	struct reloc *reloc;
-	struct list_head alts;
+	struct alternative *alts;
 	struct symbol *sym;
 	struct stack_op *stack_ops;
 	struct cfi_state *cfi;

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Make instruction::stack_ops a single-linked list
  2023-02-08 17:17 ` [PATCH 02/10] objtool: Make instruction::stack_ops a single-linked list Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     3ee88df1b063962e39d7798ccc3b18fd10cea813
Gitweb:        https://git.kernel.org/tip/3ee88df1b063962e39d7798ccc3b18fd10cea813
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:17:58 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:20:59 +01:00

objtool: Make instruction::stack_ops a single-linked list

 struct instruction {
 	struct list_head           list;                 /*     0    16 */
 	struct hlist_node          hash;                 /*    16    16 */
 	struct list_head           call_node;            /*    32    16 */
 	struct section *           sec;                  /*    48     8 */
 	long unsigned int          offset;               /*    56     8 */
 	/* --- cacheline 1 boundary (64 bytes) --- */
 	unsigned int               len;                  /*    64     4 */
 	enum insn_type             type;                 /*    68     4 */
 	long unsigned int          immediate;            /*    72     8 */
 	u16                        dead_end:1;           /*    80: 0  2 */
 	u16                        ignore:1;             /*    80: 1  2 */
 	u16                        ignore_alts:1;        /*    80: 2  2 */
 	u16                        hint:1;               /*    80: 3  2 */
 	u16                        save:1;               /*    80: 4  2 */
 	u16                        restore:1;            /*    80: 5  2 */
 	u16                        retpoline_safe:1;     /*    80: 6  2 */
 	u16                        noendbr:1;            /*    80: 7  2 */
 	u16                        entry:1;              /*    80: 8  2 */

 	/* XXX 7 bits hole, try to pack */

 	s8                         instr;                /*    82     1 */
 	u8                         visited;              /*    83     1 */

 	/* XXX 4 bytes hole, try to pack */

 	struct alt_group *         alt_group;            /*    88     8 */
 	struct symbol *            call_dest;            /*    96     8 */
 	struct instruction *       jump_dest;            /*   104     8 */
 	struct instruction *       first_jump_src;       /*   112     8 */
 	struct reloc *             jump_table;           /*   120     8 */
 	/* --- cacheline 2 boundary (128 bytes) --- */
 	struct reloc *             reloc;                /*   128     8 */
 	struct list_head           alts;                 /*   136    16 */
 	struct symbol *            sym;                  /*   152     8 */
-	struct list_head           stack_ops;            /*   160    16 */
-	struct cfi_state *         cfi;                  /*   176     8 */
+	struct stack_op *          stack_ops;            /*   160     8 */
+	struct cfi_state *         cfi;                  /*   168     8 */

-	/* size: 184, cachelines: 3, members: 29 */
-	/* sum members: 178, holes: 1, sum holes: 4 */
+	/* size: 176, cachelines: 3, members: 29 */
+	/* sum members: 170, holes: 1, sum holes: 4 */
 	/* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 7 bits */
-	/* last cacheline: 56 bytes */
+	/* last cacheline: 48 bytes */
 };

pre:	5:58.22 real,   226.69 user,    131.22 sys,     26221520 mem
post:	5:58.50 real,   229.64 user,    128.65 sys,     26221520 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.362196959@infradead.org
---
 tools/objtool/arch/x86/decode.c       |  4 ++--
 tools/objtool/check.c                 | 11 +++++------
 tools/objtool/include/objtool/arch.h  |  2 +-
 tools/objtool/include/objtool/check.h |  2 +-
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index c5c4927..9ef024f 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -105,7 +105,7 @@ bool arch_pc_relative_reloc(struct reloc *reloc)
 #define ADD_OP(op) \
 	if (!(op = calloc(1, sizeof(*op)))) \
 		return -1; \
-	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+	else for (*ops_list = op, ops_list = &op->next; op; op = NULL)
 
 /*
  * Helpers to decode ModRM/SIB:
@@ -148,7 +148,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			    unsigned long offset, unsigned int maxlen,
 			    struct instruction *insn)
 {
-	struct list_head *ops_list = &insn->stack_ops;
+	struct stack_op **ops_list = &insn->stack_ops;
 	const struct elf *elf = file->elf;
 	struct insn ins;
 	int x86_64, ret;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b3b423d..8109d74 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -398,7 +398,6 @@ static int decode_instructions(struct objtool_file *file)
 			}
 			memset(insn, 0, sizeof(*insn));
 			INIT_LIST_HEAD(&insn->alts);
-			INIT_LIST_HEAD(&insn->stack_ops);
 			INIT_LIST_HEAD(&insn->call_node);
 
 			insn->sec = sec;
@@ -1331,12 +1330,13 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i
 
 static void remove_insn_ops(struct instruction *insn)
 {
-	struct stack_op *op, *tmp;
+	struct stack_op *op, *next;
 
-	list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
-		list_del(&op->list);
+	for (op = insn->stack_ops; op; op = next) {
+		next = op->next;
 		free(op);
 	}
+	insn->stack_ops = NULL;
 }
 
 static void annotate_call_site(struct objtool_file *file,
@@ -1781,7 +1781,6 @@ static int handle_group_alt(struct objtool_file *file,
 		}
 		memset(nop, 0, sizeof(*nop));
 		INIT_LIST_HEAD(&nop->alts);
-		INIT_LIST_HEAD(&nop->stack_ops);
 
 		nop->sec = special_alt->new_sec;
 		nop->offset = special_alt->new_off + special_alt->new_len;
@@ -3226,7 +3225,7 @@ static int handle_insn_ops(struct instruction *insn,
 {
 	struct stack_op *op;
 
-	list_for_each_entry(op, &insn->stack_ops, list) {
+	for (op = insn->stack_ops; op; op = op->next) {
 
 		if (update_cfi_state(insn, next_insn, &state->cfi, op))
 			return 1;
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 73149f8..2b6d2ce 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -62,9 +62,9 @@ struct op_src {
 };
 
 struct stack_op {
+	struct stack_op *next;
 	struct op_dest dest;
 	struct op_src src;
-	struct list_head list;
 };
 
 struct instruction;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index acd7fae..23e9819 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -68,7 +68,7 @@ struct instruction {
 	struct reloc *reloc;
 	struct list_head alts;
 	struct symbol *sym;
-	struct list_head stack_ops;
+	struct stack_op *stack_ops;
 	struct cfi_state *cfi;
 };
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip: objtool/core] objtool: Change arch_decode_instruction() signature
  2023-02-08 17:17 ` [PATCH 01/10] objtool: Change arch_decode_instruction() signature Peter Zijlstra
  2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
@ 2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-02-23  8:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel),
	Ingo Molnar, Josh Poimboeuf, Nathan Chancellor, linux, x86,
	linux-kernel

The following commit has been merged into the objtool/core branch of tip:

Commit-ID:     20a554638dd2665a88d3d68a68f7981480a27f36
Gitweb:        https://git.kernel.org/tip/20a554638dd2665a88d3d68a68f7981480a27f36
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 08 Feb 2023 18:17:57 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 23 Feb 2023 09:20:50 +01:00

objtool: Change arch_decode_instruction() signature

In preparation to changing struct instruction around a bit, avoid
passing it's members by pointer and instead pass the whole thing.

A cleanup in it's own right too.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.291087549@infradead.org
---
 tools/objtool/arch/powerpc/decode.c  |  22 ++---
 tools/objtool/arch/x86/decode.c      | 105 ++++++++++++--------------
 tools/objtool/check.c                |   4 +-
 tools/objtool/include/objtool/arch.h |   4 +-
 4 files changed, 64 insertions(+), 71 deletions(-)

diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index 9c65380..53b5569 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -41,38 +41,36 @@ const char *arch_ret_insn(int len)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list)
+			    struct instruction *insn)
 {
 	unsigned int opcode;
 	enum insn_type typ;
 	unsigned long imm;
-	u32 insn;
+	u32 ins;
 
-	insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
-	opcode = insn >> 26;
+	ins = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
+	opcode = ins >> 26;
 	typ = INSN_OTHER;
 	imm = 0;
 
 	switch (opcode) {
 	case 18: /* b[l][a] */
-		if ((insn & 3) == 1) /* bl */
+		if ((ins & 3) == 1) /* bl */
 			typ = INSN_CALL;
 
-		imm = insn & 0x3fffffc;
+		imm = ins & 0x3fffffc;
 		if (imm & 0x2000000)
 			imm -= 0x4000000;
 		break;
 	}
 
 	if (opcode == 1)
-		*len = 8;
+		insn->len = 8;
 	else
-		*len = 4;
+		insn->len = 4;
 
-	*type = typ;
-	*immediate = imm;
+	insn->type = typ;
+	insn->immediate = imm;
 
 	return 0;
 }
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index e7b030f..c5c4927 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -146,12 +146,11 @@ static bool has_notrack_prefix(struct insn *insn)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list)
+			    struct instruction *insn)
 {
+	struct list_head *ops_list = &insn->stack_ops;
 	const struct elf *elf = file->elf;
-	struct insn insn;
+	struct insn ins;
 	int x86_64, ret;
 	unsigned char op1, op2, op3, prefix,
 		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
@@ -165,42 +164,42 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 	if (x86_64 == -1)
 		return -1;
 
-	ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+	ret = insn_decode(&ins, sec->data->d_buf + offset, maxlen,
 			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
 	if (ret < 0) {
 		WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
 		return -1;
 	}
 
-	*len = insn.length;
-	*type = INSN_OTHER;
+	insn->len = ins.length;
+	insn->type = INSN_OTHER;
 
-	if (insn.vex_prefix.nbytes)
+	if (ins.vex_prefix.nbytes)
 		return 0;
 
-	prefix = insn.prefixes.bytes[0];
+	prefix = ins.prefixes.bytes[0];
 
-	op1 = insn.opcode.bytes[0];
-	op2 = insn.opcode.bytes[1];
-	op3 = insn.opcode.bytes[2];
+	op1 = ins.opcode.bytes[0];
+	op2 = ins.opcode.bytes[1];
+	op3 = ins.opcode.bytes[2];
 
-	if (insn.rex_prefix.nbytes) {
-		rex = insn.rex_prefix.bytes[0];
+	if (ins.rex_prefix.nbytes) {
+		rex = ins.rex_prefix.bytes[0];
 		rex_w = X86_REX_W(rex) >> 3;
 		rex_r = X86_REX_R(rex) >> 2;
 		rex_x = X86_REX_X(rex) >> 1;
 		rex_b = X86_REX_B(rex);
 	}
 
-	if (insn.modrm.nbytes) {
-		modrm = insn.modrm.bytes[0];
+	if (ins.modrm.nbytes) {
+		modrm = ins.modrm.bytes[0];
 		modrm_mod = X86_MODRM_MOD(modrm);
 		modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
 		modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
 	}
 
-	if (insn.sib.nbytes) {
-		sib = insn.sib.bytes[0];
+	if (ins.sib.nbytes) {
+		sib = ins.sib.bytes[0];
 		/* sib_scale = X86_SIB_SCALE(sib); */
 		sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
 		sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
@@ -254,7 +253,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0x70 ... 0x7f:
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0x80 ... 0x83:
@@ -278,7 +277,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		if (!rm_is_reg(CFI_SP))
 			break;
 
-		imm = insn.immediate.value;
+		imm = ins.immediate.value;
 		if (op1 & 2) { /* sign extend */
 			if (op1 & 1) { /* imm32 */
 				imm <<= 32;
@@ -309,7 +308,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			ADD_OP(op) {
 				op->src.type = OP_SRC_AND;
 				op->src.reg = CFI_SP;
-				op->src.offset = insn.immediate.value;
+				op->src.offset = ins.immediate.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = CFI_SP;
 			}
@@ -356,7 +355,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 					op->src.reg = CFI_SP;
 					op->dest.type = OP_DEST_REG_INDIRECT;
 					op->dest.reg = modrm_rm;
-					op->dest.offset = insn.displacement.value;
+					op->dest.offset = ins.displacement.value;
 				}
 				break;
 			}
@@ -389,7 +388,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_BP;
-				op->dest.offset = insn.displacement.value;
+				op->dest.offset = ins.displacement.value;
 			}
 			break;
 		}
@@ -402,7 +401,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_SP;
-				op->dest.offset = insn.displacement.value;
+				op->dest.offset = ins.displacement.value;
 			}
 			break;
 		}
@@ -419,7 +418,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_BP;
-				op->src.offset = insn.displacement.value;
+				op->src.offset = ins.displacement.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = modrm_reg;
 			}
@@ -432,7 +431,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG_INDIRECT;
 				op->src.reg = CFI_SP;
-				op->src.offset = insn.displacement.value;
+				op->src.offset = ins.displacement.value;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = modrm_reg;
 			}
@@ -464,7 +463,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 		/* lea disp(%src), %dst */
 		ADD_OP(op) {
-			op->src.offset = insn.displacement.value;
+			op->src.offset = ins.displacement.value;
 			if (!op->src.offset) {
 				/* lea (%src), %dst */
 				op->src.type = OP_SRC_REG;
@@ -487,7 +486,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0x90:
-		*type = INSN_NOP;
+		insn->type = INSN_NOP;
 		break;
 
 	case 0x9c:
@@ -511,39 +510,39 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		if (op2 == 0x01) {
 
 			if (modrm == 0xca)
-				*type = INSN_CLAC;
+				insn->type = INSN_CLAC;
 			else if (modrm == 0xcb)
-				*type = INSN_STAC;
+				insn->type = INSN_STAC;
 
 		} else if (op2 >= 0x80 && op2 <= 0x8f) {
 
-			*type = INSN_JUMP_CONDITIONAL;
+			insn->type = INSN_JUMP_CONDITIONAL;
 
 		} else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
 			   op2 == 0x35) {
 
 			/* sysenter, sysret */
-			*type = INSN_CONTEXT_SWITCH;
+			insn->type = INSN_CONTEXT_SWITCH;
 
 		} else if (op2 == 0x0b || op2 == 0xb9) {
 
 			/* ud2 */
-			*type = INSN_BUG;
+			insn->type = INSN_BUG;
 
 		} else if (op2 == 0x0d || op2 == 0x1f) {
 
 			/* nopl/nopw */
-			*type = INSN_NOP;
+			insn->type = INSN_NOP;
 
 		} else if (op2 == 0x1e) {
 
 			if (prefix == 0xf3 && (modrm == 0xfa || modrm == 0xfb))
-				*type = INSN_ENDBR;
+				insn->type = INSN_ENDBR;
 
 
 		} else if (op2 == 0x38 && op3 == 0xf8) {
-			if (insn.prefixes.nbytes == 1 &&
-			    insn.prefixes.bytes[0] == 0xf2) {
+			if (ins.prefixes.nbytes == 1 &&
+			    ins.prefixes.bytes[0] == 0xf2) {
 				/* ENQCMD cannot be used in the kernel. */
 				WARN("ENQCMD instruction at %s:%lx", sec->name,
 				     offset);
@@ -591,29 +590,29 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 	case 0xcc:
 		/* int3 */
-		*type = INSN_TRAP;
+		insn->type = INSN_TRAP;
 		break;
 
 	case 0xe3:
 		/* jecxz/jrcxz */
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0xe9:
 	case 0xeb:
-		*type = INSN_JUMP_UNCONDITIONAL;
+		insn->type = INSN_JUMP_UNCONDITIONAL;
 		break;
 
 	case 0xc2:
 	case 0xc3:
-		*type = INSN_RETURN;
+		insn->type = INSN_RETURN;
 		break;
 
 	case 0xc7: /* mov imm, r/m */
 		if (!opts.noinstr)
 			break;
 
-		if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
+		if (ins.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
 			struct reloc *immr, *disp;
 			struct symbol *func;
 			int idx;
@@ -661,17 +660,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
-		*type = INSN_CONTEXT_SWITCH;
+		insn->type = INSN_CONTEXT_SWITCH;
 		break;
 
 	case 0xe0: /* loopne */
 	case 0xe1: /* loope */
 	case 0xe2: /* loop */
-		*type = INSN_JUMP_CONDITIONAL;
+		insn->type = INSN_JUMP_CONDITIONAL;
 		break;
 
 	case 0xe8:
-		*type = INSN_CALL;
+		insn->type = INSN_CALL;
 		/*
 		 * For the impact on the stack, a CALL behaves like
 		 * a PUSH of an immediate value (the return address).
@@ -683,30 +682,30 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0xfc:
-		*type = INSN_CLD;
+		insn->type = INSN_CLD;
 		break;
 
 	case 0xfd:
-		*type = INSN_STD;
+		insn->type = INSN_STD;
 		break;
 
 	case 0xff:
 		if (modrm_reg == 2 || modrm_reg == 3) {
 
-			*type = INSN_CALL_DYNAMIC;
-			if (has_notrack_prefix(&insn))
+			insn->type = INSN_CALL_DYNAMIC;
+			if (has_notrack_prefix(&ins))
 				WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
 		} else if (modrm_reg == 4) {
 
-			*type = INSN_JUMP_DYNAMIC;
-			if (has_notrack_prefix(&insn))
+			insn->type = INSN_JUMP_DYNAMIC;
+			if (has_notrack_prefix(&ins))
 				WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
 		} else if (modrm_reg == 5) {
 
 			/* jmpf */
-			*type = INSN_CONTEXT_SWITCH;
+			insn->type = INSN_CONTEXT_SWITCH;
 
 		} else if (modrm_reg == 6) {
 
@@ -723,7 +722,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 	}
 
-	*immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
+	insn->immediate = ins.immediate.nbytes ? ins.immediate.value : 0;
 
 	return 0;
 }
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ba07a8e..b3b423d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -406,9 +406,7 @@ static int decode_instructions(struct objtool_file *file)
 
 			ret = arch_decode_instruction(file, sec, offset,
 						      sec->sh.sh_size - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate,
-						      &insn->stack_ops);
+						      insn);
 			if (ret)
 				goto err;
 
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 4ecb480..73149f8 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -75,9 +75,7 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state);
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate,
-			    struct list_head *ops_list);
+			    struct instruction *insn);
 
 bool arch_callee_saved_reg(unsigned char reg);
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2023-02-23  8:32 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-08 17:17 [PATCH 00/10] objtool: Honey, I shrunk the instruction Peter Zijlstra
2023-02-08 17:17 ` [PATCH 01/10] objtool: Change arch_decode_instruction() signature Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:17 ` [PATCH 02/10] objtool: Make instruction::stack_ops a single-linked list Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:17 ` [PATCH 03/10] objtool: Make instruction::alts " Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 04/10] objtool: Shrink instruction::{type,visited} Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 05/10] objtool: Remove instruction::reloc Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 06/10] objtool: Union instruction::{call_dest,jump_table} Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 07/10] objtool: Fix overlapping alternatives Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 08/10] x86: Fix FILL_RETURN_BUFFER Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 09/10] objtool: Remove instruction::list Peter Zijlstra
2023-02-13 11:10   ` [tip: objtool/core] " tip-bot2 for Peter Zijlstra
2023-02-23  8:32   ` tip-bot2 for Peter Zijlstra
2023-02-08 17:18 ` [PATCH 10/10][HACK] objtool: Shrink reloc Peter Zijlstra
2023-02-08 18:35 ` [PATCH 00/10] objtool: Honey, I shrunk the instruction Nathan Chancellor
2023-02-08 20:22 ` Damian Tometzki
2023-02-09 10:22   ` Peter Zijlstra
2023-02-09 10:56     ` Damian Tometzki
2023-02-09 19:57 ` Josh Poimboeuf
2023-02-10  8:59   ` Peter Zijlstra
2023-02-10 13:47     ` [PATCH 00/10] objtool: Honey, I shrunk the instruction^Wreloc Peter Zijlstra
2023-02-10  1:50 ` [PATCH 00/10] objtool: Honey, I shrunk the instruction Thomas Weißschuh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.