linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "tip-bot2 for Peter Zijlstra" <tip-bot2@linutronix.de>
To: linux-tip-commits@vger.kernel.org
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Borislav Petkov <bp@suse.de>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [tip: x86/core] x86/alternatives: Optimize optimize_nops()
Date: Thu, 01 Apr 2021 15:08:58 -0000	[thread overview]
Message-ID: <161728973897.29796.8946571512222425365.tip-bot2@tip-bot2> (raw)
In-Reply-To: <20210326151259.442992235@infradead.org>

The following commit has been merged into the x86/core branch of tip:

Commit-ID:     b4da5166b084f3fac01d68e0e67cbf3bf78a3e12
Gitweb:        https://git.kernel.org/tip/b4da5166b084f3fac01d68e0e67cbf3bf78a3e12
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 26 Mar 2021 16:12:01 +01:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 31 Mar 2021 20:30:04 +02:00

x86/alternatives: Optimize optimize_nops()

Currently, optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:

  141:	\oldinstr
  142:	.skip (len-(142b-141b)), 0x90

That is, when oldinstr is short, the tail is padded with NOPs. This case
never gets optimized.

Rewrite optimize_nops() to replace any trailing string of NOPs inside
the alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.

A direct consequence is that padlen becomes superfluous, so remove it.

 [ bp:
   - Adjust commit message
   - remove a stale comment about needing to pad
   - add a comment in optimize_nops()
   - exit early if the NOP verif. loop catches a mismatch - function
     should not not add NOPs in that case
   - fix the "optimized NOPs" offsets output ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210326151259.442992235@infradead.org
---
 arch/x86/include/asm/alternative.h            | 17 +-----
 arch/x86/kernel/alternative.c                 | 49 +++++++++++-------
 tools/objtool/arch/x86/include/arch/special.h |  2 +-
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 17b3609..a3c2315 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,7 +65,6 @@ struct alt_instr {
 	u16 cpuid;		/* cpuid bit set for replacement */
 	u8  instrlen;		/* length of original instruction */
 	u8  replacementlen;	/* length of new instruction */
-	u8  padlen;		/* length of build-time padding */
 } __packed;
 
 /*
@@ -104,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
 
 #define alt_end_marker		"663"
 #define alt_slen		"662b-661b"
-#define alt_pad_len		alt_end_marker"b-662b"
 #define alt_total_slen		alt_end_marker"b-661b"
 #define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
 
@@ -151,8 +149,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
 	" .word " __stringify(feature) "\n"		/* feature bit     */ \
 	" .byte " alt_total_slen "\n"			/* source len      */ \
-	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
-	" .byte " alt_pad_len "\n"			/* pad len */
+	" .byte " alt_rlen(num) "\n"			/* replacement len */
 
 #define ALTINSTR_REPLACEMENT(newinstr, num)		/* replacement */	\
 	"# ALT: replacement " #num "\n"						\
@@ -224,9 +221,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * Peculiarities:
  * No memory clobber here.
  * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
  * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
@@ -315,13 +309,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+.macro altinstruction_entry orig alt feature orig_len alt_len
 	.long \orig - .
 	.long \alt - .
 	.word \feature
 	.byte \orig_len
 	.byte \alt_len
-	.byte \pad_len
 .endm
 
 /*
@@ -338,7 +331,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
 142:
 
 	.pushsection .altinstructions,"a"
-	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
 	.popsection
 
 	.pushsection .altinstr_replacement,"ax"
@@ -375,8 +368,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
 142:
 
 	.pushsection .altinstructions,"a"
-	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
 	.popsection
 
 	.pushsection .altinstr_replacement,"ax"
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f902f28..1298d58 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -345,19 +345,35 @@ done:
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
 	unsigned long flags;
-	int i;
+	struct insn insn;
+	int nop, i = 0;
+
+	/*
+	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
+	 * NOPs, optimize them.
+	 */
+	for (;;) {
+		if (insn_decode_kernel(&insn, &instr[i]))
+			return;
+
+		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+			break;
+
+		if ((i += insn.length) >= a->instrlen)
+			return;
+	}
 
-	for (i = 0; i < a->padlen; i++) {
-		if (instr[i] != 0x90)
+	for (nop = i; i < a->instrlen; i++) {
+		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
 			return;
 	}
 
 	local_irq_save(flags);
-	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+	add_nops(instr + nop, i - nop);
 	local_irq_restore(flags);
 
 	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, a->instrlen - a->padlen, a->padlen);
+		   instr, nop, a->instrlen);
 }
 
 /*
@@ -403,19 +419,15 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		 * - feature not present but ALTINSTR_FLAG_INV is set to mean,
 		 *   patch if feature is *NOT* present.
 		 */
-		if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
-			if (a->padlen > 1)
-				optimize_nops(a, instr);
-
-			continue;
-		}
+		if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
+			goto next;
 
-		DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+		DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
 			(a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
 			feature >> 5,
 			feature & 0x1f,
 			instr, instr, a->instrlen,
-			replacement, a->replacementlen, a->padlen);
+			replacement, a->replacementlen);
 
 		DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
 		DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -439,14 +451,15 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		if (a->replacementlen && is_jmp(replacement[0]))
 			recompute_jump(a, instr, replacement, insn_buff);
 
-		if (a->instrlen > a->replacementlen) {
-			add_nops(insn_buff + a->replacementlen,
-				 a->instrlen - a->replacementlen);
-			insn_buff_sz += a->instrlen - a->replacementlen;
-		}
+		for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
+			insn_buff[insn_buff_sz] = 0x90;
+
 		DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
 
 		text_poke_early(instr, insn_buff, insn_buff_sz);
+
+next:
+		optimize_nops(a, instr);
 	}
 }
 
diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index d818b2b..14271cc 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -10,7 +10,7 @@
 #define JUMP_ORIG_OFFSET	0
 #define JUMP_NEW_OFFSET		4
 
-#define ALT_ENTRY_SIZE		13
+#define ALT_ENTRY_SIZE		12
 #define ALT_ORIG_OFFSET		0
 #define ALT_NEW_OFFSET		4
 #define ALT_FEATURE_OFFSET	8

  reply	other threads:[~2021-04-01 18:01 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-26 15:11 [PATCH v3 00/16] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 01/16] x86: Add insn_decode_kernel() Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 02/16] x86/alternatives: Optimize optimize_nops() Peter Zijlstra
2021-04-01 15:08   ` tip-bot2 for Peter Zijlstra [this message]
2021-04-03 11:11   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 03/16] x86/retpoline: Simplify retpolines Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-04-06  8:56     ` David Laight
2021-03-26 15:12 ` [PATCH v3 04/16] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 05/16] objtool: Per arch retpoline naming Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] objtool: Handle per " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 06/16] objtool: Fix static_call list generation Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 07/16] objtool: Rework rebuild_reloc logic Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` [tip: x86/core] objtool: Rework the elf_rebuild_reloc_section() logic tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 08/16] objtool: Add elf_create_reloc() helper Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 09/16] objtool: Implicitly create reloc sections Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` [tip: x86/core] objtool: Create reloc sections implicitly tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 10/16] objtool: Extract elf_strtab_concat() Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 11/16] objtool: Extract elf_symbol_add() Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 12/16] objtool: Add elf_create_undef_symbol() Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 13/16] objtool: Keep track of retpoline call sites Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 14/16] objtool: Cache instruction relocs Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 15/16] objtool: Skip magical retpoline .altinstr_replacement Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-26 15:12 ` [PATCH v3 16/16] objtool,x86: Rewrite retpoline thunk calls Peter Zijlstra
2021-03-29 16:38   ` Josh Poimboeuf
2021-06-02 15:51     ` Lukasz Majczak
2021-06-02 16:56       ` Peter Zijlstra
2021-06-02 17:10         ` Peter Zijlstra
2021-06-02 20:43       ` Josh Poimboeuf
2021-06-04 20:50       ` Nick Desaulniers
2021-06-04 23:27         ` Nick Desaulniers
2021-06-04 23:50           ` Fangrui Song
2021-06-05 10:38             ` Peter Zijlstra
2021-06-06  1:58               ` Fāng-ruì Sòng
2021-06-07  7:56                 ` Peter Zijlstra
2021-06-07  9:22                   ` Peter Zijlstra
2021-06-07  9:45                     ` Peter Zijlstra
2021-06-07 17:23                       ` Fāng-ruì Sòng
2021-06-07 18:25                         ` Peter Zijlstra
2021-06-07 20:54                       ` Nick Desaulniers
2021-06-08  9:56                         ` Peter Zijlstra
2021-06-08 16:58                         ` Nathan Chancellor
2021-06-08 17:22                           ` Peter Zijlstra
2021-06-08 17:29                             ` Nathan Chancellor
2021-06-08 18:17                               ` Peter Zijlstra
2021-06-08 18:49                                 ` Nathan Chancellor
2021-06-09  7:11                                   ` Lukasz Majczak
2021-06-09  7:20                                     ` Peter Zijlstra
2021-06-09 12:23                                       ` Lukasz Majczak
2021-06-09 15:08                                         ` Peter Zijlstra
2021-06-09 15:11                                           ` Peter Zijlstra
2021-06-09 15:56                                           ` Nathan Chancellor
2021-06-08 18:18                               ` Nick Desaulniers
2021-06-07 18:19                 ` Peter Zijlstra
2021-06-07 18:27                   ` Fāng-ruì Sòng
2021-06-07 18:47                     ` Peter Zijlstra
2021-04-01 15:08   ` [tip: x86/core] objtool/x86: " tip-bot2 for Peter Zijlstra
2021-04-03 11:10   ` tip-bot2 for Peter Zijlstra
2021-03-30 15:02 ` [PATCH v3 00/16] x86,objtool: Optimize !RETPOLINE Miroslav Benes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=161728973897.29796.8946571512222425365.tip-bot2@tip-bot2 \
    --to=tip-bot2@linutronix.de \
    --cc=bp@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).