From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org, jpoimboe@redhat.com, jgross@suse.com, mbenes@suse.com
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org
Subject: [PATCH v2 02/14] x86/alternatives: Optimize optimize_nops()
Date: Thu, 18 Mar 2021 18:11:05 +0100 [thread overview]
Message-ID: <20210318171919.520075106@infradead.org> (raw)
In-Reply-To: 20210318171103.577093939@infradead.org
Currently optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:
141: \oldinstr
142: .skip (len-(142b-141b)), 0x90
That is, when oldinstr is short, we pad the tail with NOPs. This case
never gets optimized.
Rewrite optimize_nops() to replace any string of NOPs inside the
alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.
A direct consequence is that padlen becomes superfluous, so remove it.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/alternative.h | 14 ++---
arch/x86/kernel/alternative.c | 61 ++++++++++++++++----------
tools/objtool/arch/x86/include/arch/special.h | 2
3 files changed, 45 insertions(+), 32 deletions(-)
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,7 +65,6 @@ struct alt_instr {
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction */
- u8 padlen; /* length of build-time padding */
} __packed;
/*
@@ -104,7 +103,6 @@ static inline int alternatives_text_rese
#define alt_end_marker "663"
#define alt_slen "662b-661b"
-#define alt_pad_len alt_end_marker"b-662b"
#define alt_total_slen alt_end_marker"b-661b"
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
@@ -151,8 +149,7 @@ static inline int alternatives_text_rese
" .long " b_replacement(num)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */ \
- " .byte " alt_pad_len "\n" /* pad len */
+ " .byte " alt_rlen(num) "\n" /* replacement len */
#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
"# ALT: replacement " #num "\n" \
@@ -315,13 +312,12 @@ static inline int alternatives_text_rese
* enough information for the alternatives patching code to patch an
* instruction. See apply_alternatives().
*/
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+.macro altinstruction_entry orig alt feature orig_len alt_len
.long \orig - .
.long \alt - .
.word \feature
.byte \orig_len
.byte \alt_len
- .byte \pad_len
.endm
/*
@@ -338,7 +334,7 @@ static inline int alternatives_text_rese
142:
.pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
.popsection
.pushsection .altinstr_replacement,"ax"
@@ -375,8 +371,8 @@ static inline int alternatives_text_rese
142:
.pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
.popsection
.pushsection .altinstr_replacement,"ax"
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -345,19 +345,39 @@ recompute_jump(struct alt_instr *a, u8 *
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
- int i;
+ int nops = 0, i = 0;
+ struct insn insn;
+ u8 *nop = NULL;
- for (i = 0; i < a->padlen; i++) {
- if (instr[i] != 0x90)
+ do {
+ if (insn_decode_kernel(&insn, &instr[i]))
return;
- }
- local_irq_save(flags);
- add_nops(instr + (a->instrlen - a->padlen), a->padlen);
- local_irq_restore(flags);
+ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) {
+ if (!nop) {
+ nop = &instr[i];
+ nops = 1;
+ } else {
+ nops++;
+ }
+ }
+ i += insn.length;
+
+ if ((insn.length != 1 || i == a->instrlen) && nop) {
+
+ local_irq_save(flags);
+ add_nops(nop, nops);
+ local_irq_restore(flags);
+
+ DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
+ instr, (int)(unsigned long)(nop-instr), nops);
+
+ nop = NULL;
+ }
- DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
- instr, a->instrlen - a->padlen, a->padlen);
+ } while (i < a->instrlen);
+
+ WARN_ON_ONCE(nop);
}
/*
@@ -403,19 +423,15 @@ void __init_or_module noinline apply_alt
* - feature not present but ALTINSTR_FLAG_INV is set to mean,
* patch if feature is *NOT* present.
*/
- if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
- if (a->padlen > 1)
- optimize_nops(a, instr);
-
- continue;
- }
+ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
+ goto next;
- DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
(a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
feature >> 5,
feature & 0x1f,
instr, instr, a->instrlen,
- replacement, a->replacementlen, a->padlen);
+ replacement, a->replacementlen);
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -439,14 +455,15 @@ void __init_or_module noinline apply_alt
if (a->replacementlen && is_jmp(replacement[0]))
recompute_jump(a, instr, replacement, insn_buff);
- if (a->instrlen > a->replacementlen) {
- add_nops(insn_buff + a->replacementlen,
- a->instrlen - a->replacementlen);
- insn_buff_sz += a->instrlen - a->replacementlen;
- }
+ for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
+ insn_buff[insn_buff_sz] = 0x90;
+
DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
text_poke_early(instr, insn_buff, insn_buff_sz);
+
+next:
+ optimize_nops(a, instr);
}
}
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -10,7 +10,7 @@
#define JUMP_ORIG_OFFSET 0
#define JUMP_NEW_OFFSET 4
-#define ALT_ENTRY_SIZE 13
+#define ALT_ENTRY_SIZE 12
#define ALT_ORIG_OFFSET 0
#define ALT_NEW_OFFSET 4
#define ALT_FEATURE_OFFSET 8
next prev parent reply other threads:[~2021-03-18 17:24 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-18 17:11 [PATCH v2 00/14] x86,objtool: Optimize !RETPOLINE Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 01/14] x86: Add insn_decode_kernel() Peter Zijlstra
2021-03-19 10:40 ` Borislav Petkov
2021-03-18 17:11 ` Peter Zijlstra [this message]
2021-03-21 12:06 ` [PATCH v2 02/14] x86/alternatives: Optimize optimize_nops() Borislav Petkov
2021-03-22 8:17 ` Peter Zijlstra
2021-03-22 11:07 ` Borislav Petkov
2021-03-18 17:11 ` [PATCH v2 03/14] x86/retpoline: Simplify retpolines Peter Zijlstra
2021-03-19 17:18 ` David Laight
2021-03-22 9:32 ` Peter Zijlstra
2021-03-22 15:41 ` David Laight
2021-03-18 17:11 ` [PATCH v2 04/14] objtool: Correctly handle retpoline thunk calls Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 05/14] objtool: Per arch retpoline naming Peter Zijlstra
2021-03-19 2:38 ` Josh Poimboeuf
2021-03-19 9:07 ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 06/14] objtool: Fix static_call list generation Peter Zijlstra
2021-03-22 12:44 ` Miroslav Benes
2021-03-18 17:11 ` [PATCH v2 07/14] objtool: Rework rebuild_reloc logic Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 08/14] objtool: Add elf_create_reloc() helper Peter Zijlstra
2021-03-19 1:42 ` Josh Poimboeuf
2021-03-19 9:47 ` Peter Zijlstra
2021-03-19 15:12 ` Josh Poimboeuf
2021-03-19 15:24 ` Peter Zijlstra
2021-03-19 15:37 ` Josh Poimboeuf
2021-03-18 17:11 ` [PATCH v2 09/14] objtool: Extract elf_strtab_concat() Peter Zijlstra
2021-03-19 2:10 ` Josh Poimboeuf
2021-03-19 9:52 ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 10/14] objtool: Extract elf_symbol_add() Peter Zijlstra
2021-03-19 2:14 ` Josh Poimboeuf
2021-03-19 9:54 ` Peter Zijlstra
2021-03-19 15:04 ` Josh Poimboeuf
2021-03-18 17:11 ` [PATCH v2 11/14] objtool: Add elf_create_undef_symbol() Peter Zijlstra
2021-03-19 2:29 ` Josh Poimboeuf
2021-03-19 7:56 ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 12/14] objtool: Allow archs to rewrite retpolines Peter Zijlstra
2021-03-19 2:54 ` Josh Poimboeuf
2021-03-19 11:21 ` Peter Zijlstra
2021-03-19 13:28 ` Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 13/14] objtool: Skip magical retpoline .altinstr_replacement Peter Zijlstra
2021-03-18 17:11 ` [PATCH v2 14/14] objtool,x86: Rewrite retpoline thunk calls Peter Zijlstra
2021-03-19 3:29 ` Josh Poimboeuf
2021-03-19 8:06 ` Peter Zijlstra
2021-03-19 15:30 ` Josh Poimboeuf
2021-03-19 15:56 ` Peter Zijlstra
2021-03-19 22:52 ` Josh Poimboeuf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210318171919.520075106@infradead.org \
--to=peterz@infradead.org \
--cc=jgross@suse.com \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mbenes@suse.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).