All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org,
	mhiramat@kernel.org, kirill.shutemov@linux.intel.com,
	Andrew.Cooper3@citrix.com, jpoimboe@redhat.com
Subject: [PATCH v3 3/4] x86/alternative: Rewrite optimize_nops() some
Date: Wed, 08 Feb 2023 18:10:53 +0100	[thread overview]
Message-ID: <20230208171431.373412974@infradead.org> (raw)
In-Reply-To: 20230208171050.490809180@infradead.org

This rewrite address two issues:

 - it no longer hard requires single byte nop runs, it now accepts
   any NOP and NOPL encoded instruction (but not the more complicated
   32bit NOPs).

 - it writes a single 'instruction' replacement.

Specifically, ORC unwinder relies on the tail NOP of an alternative to
be a single instruction, in particular it relies on the inner bytes
not being executed.

Once we reach the max supported NOP length (currently 8, could easily
be extended to 11 on x86_64), switches to JMP.d8 and INT3 padding to
achieve the same result.

The ORC unwinder uses this guarantee in the analysis of
alternative/overlapping CFI state,

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/alternative.c |  103 ++++++++++++++++++++++--------------------
 1 file changed, 54 insertions(+), 49 deletions(-)

--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -126,6 +126,30 @@ static void __init_or_module add_nops(vo
 	}
 }
 
+static void __init_or_module add_nop(u8 *instr, unsigned int len)
+{
+	u8 *target = instr + len;
+
+	if (!len)
+		return;
+
+	if (len <= ASM_NOP_MAX) {
+		memcpy(instr, x86_nops[len], len);
+		return;
+	}
+
+	if (len < 128) {
+		__text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE);
+		instr += JMP8_INSN_SIZE;
+	} else {
+		__text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE);
+		instr += JMP32_INSN_SIZE;
+	}
+
+	for (;instr < target; instr++)
+		*instr = INT3_INSN_OPCODE;
+}
+
 extern s32 __retpoline_sites[], __retpoline_sites_end[];
 extern s32 __return_sites[], __return_sites_end[];
 extern s32 __cfi_sites[], __cfi_sites_end[];
@@ -134,39 +158,32 @@ extern struct alt_instr __alt_instructio
 extern s32 __smp_locks[], __smp_locks_end[];
 void text_poke_early(void *addr, const void *opcode, size_t len);
 
-/*
- * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
- *
- * @instr: instruction byte stream
- * @instrlen: length of the above
- * @off: offset within @instr where the first NOP has been detected
- *
- * Return: number of NOPs found (and replaced).
- */
-static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+static bool insn_is_nop(struct insn *insn)
 {
-	unsigned long flags;
-	int i = off, nnops;
+	if (insn->opcode.bytes[0] == 0x90)
+		return true;
 
-	while (i < instrlen) {
-		if (instr[i] != 0x90)
-			break;
+	if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
+		return true;
 
-		i++;
-	}
+	/* TODO: more nops */
 
-	nnops = i - off;
+	return false;
+}
 
-	if (nnops <= 1)
-		return nnops;
+static int skip_nops(u8 *instr, int offset, int len)
+{
+	struct insn insn;
 
-	local_irq_save(flags);
-	add_nops(instr + off, nnops);
-	local_irq_restore(flags);
+	for (; offset < len; offset += insn.length) {
+		if (insn_decode_kernel(&insn, &instr[offset]))
+			break;
 
-	DUMP_BYTES(ALT, instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+		if (!insn_is_nop(&insn))
+			break;
+	}
 
-	return nnops;
+	return offset;
 }
 
 /*
@@ -175,28 +192,19 @@ static __always_inline int optimize_nops
  */
 static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
 {
-	struct insn insn;
-	int i = 0;
+	for (int next, i = 0; i < len; i = next) {
+		struct insn insn;
 
-	/*
-	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
-	 * ones.
-	 */
-	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
-		/*
-		 * See if this and any potentially following NOPs can be
-		 * optimized.
-		 */
-		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			i += optimize_nops_range(instr, len, i);
-		else
-			i += insn.length;
+		next = i + insn.length;
 
-		if (i >= len)
-			return;
+		if (insn_is_nop(&insn)) {
+			next = skip_nops(instr, next, len);
+			add_nop(instr + i, next - i);
+			DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, i, next);
+		}
 	}
 }
 
@@ -317,13 +325,10 @@ apply_relocation(u8 *buf, size_t len, u8
 			}
 		}
 
-
-		/*
-		 * See if this and any potentially following NOPs can be
-		 * optimized.
-		 */
-		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			next = i + optimize_nops_range(buf, len, i);
+		if (insn_is_nop(&insn)) {
+			next = skip_nops(buf, next, len);
+			add_nop(buf + i, next - i);
+		}
 	}
 }
 



  parent reply	other threads:[~2023-02-08 17:18 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-08 17:10 [PATCH v3 0/4] x86: Fully relocatable alternatives and some NOPs Peter Zijlstra
2023-02-08 17:10 ` [PATCH v3 1/4] x86/alternative: Make debug-alternative selective Peter Zijlstra
2023-02-14 11:48   ` Borislav Petkov
2023-05-13 13:03   ` [tip: x86/alternatives] " tip-bot2 for Peter Zijlstra
2023-02-08 17:10 ` [PATCH v3 2/4] x86/alternative: Support relocations in alternatives Peter Zijlstra
2023-02-17 20:28   ` Borislav Petkov
2023-02-17 22:21   ` Borislav Petkov
2023-05-13 13:03   ` [tip: x86/alternatives] " tip-bot2 for Peter Zijlstra
2023-02-08 17:10 ` Peter Zijlstra [this message]
2023-02-08 19:52   ` [PATCH v3 3/4] x86/alternative: Rewrite optimize_nops() some Andrew.Cooper3
2023-02-08 20:29     ` Peter Zijlstra
2023-02-08 20:36       ` Peter Zijlstra
2023-02-08 20:44         ` Peter Zijlstra
2023-02-08 20:45           ` Peter Zijlstra
2023-02-08 21:01           ` Peter Zijlstra
2023-02-08 21:08           ` Peter Zijlstra
2023-02-08 21:21             ` Peter Zijlstra
2023-02-09  1:11               ` Andrew.Cooper3
2023-02-09 22:27                 ` David Laight
2023-02-09  1:33       ` Andrew.Cooper3
2023-02-08 23:04     ` David Laight
2023-05-13 13:03   ` [tip: x86/alternatives] " tip-bot2 for Peter Zijlstra
2023-02-08 17:10 ` [PATCH v3 4/4] x86/alternative: Complicate optimize_nops() some more Peter Zijlstra
2023-05-13 13:03   ` [tip: x86/alternatives] " tip-bot2 for Peter Zijlstra
2023-05-13 16:01     ` [PATCH] x86/alternatives: Fix section mismatch warnings Borislav Petkov
2023-05-13 16:10       ` [tip: x86/alternatives] " tip-bot2 for Borislav Petkov (AMD)
2023-02-27 10:49 ` [PATCH] x86/lib/memmove: Decouple ERMS from FSRM Borislav Petkov
2023-04-27  9:22   ` [PATCH TEST] " Yahu Gao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230208171431.373412974@infradead.org \
    --to=peterz@infradead.org \
    --cc=Andrew.Cooper3@citrix.com \
    --cc=jpoimboe@redhat.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.