linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position
@ 2021-06-01 21:21 Borislav Petkov
  2021-06-02  8:14 ` Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Borislav Petkov @ 2021-06-01 21:21 UTC (permalink / raw)
  To: X86 ML; +Cc: LKML, Richard Narron

From: Borislav Petkov <bp@suse.de>

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/alternative.c | 62 +++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b5174495..7baf13b11952 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -182,42 +182,68 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
 		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
 }
 
+/*
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ",
+		   instr, off, i);
+
+	return nnops;
+}
+
+
 /*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
@ 2021-06-02  8:14 ` Peter Zijlstra
  2021-06-02 10:37   ` Borislav Petkov
  2021-06-02 19:49 ` [tip: x86/urgent] " tip-bot2 for Borislav Petkov
  2021-06-03 14:38 ` tip-bot2 for Borislav Petkov
  2 siblings, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2021-06-02  8:14 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: X86 ML, LKML, Richard Narron

On Tue, Jun 01, 2021 at 11:21:25PM +0200, Borislav Petkov wrote:
> From: Borislav Petkov <bp@suse.de>
> 
> Up until now the assumption was that an alternative patching site would
> have some instructions at the beginning and trailing single-byte NOPs
> (0x90) padding. Therefore, the patching machinery would go and optimize
> those single-byte NOPs into longer ones.
> 
> However, this assumption is broken on 32-bit when code like
> hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
> killer CALL_NOSPEC. The 32-bit version of that macro would align certain
> insns to 16 bytes, leading to the compiler issuing a one or more
> single-byte NOPs, depending on the holes it needs to fill for alignment.

Who again insisted that wouldn't happen? :-)

> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 6974b5174495..7baf13b11952 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -182,42 +182,68 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
>  		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
>  }
>  
> +/*
> + * @instr: instruction byte stream
> + * @instrlen: length of the above
> + * @off: offset within @instr where the first NOP has been detected
> + */

That's almost a proper comment, might as well finish it

/*
 * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
 * @instr: instruction byte stream
 * @instrlen: length of the above
 * @off: offset within @instr where the first NOP has been detected
 *
 * Return: number of NOPs found (and replaced)
 */
> +static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
> +{
> +	unsigned long flags;
> +	int i = off, nnops;
> +
> +	while (i < instrlen) {
> +		if (instr[i] != 0x90)
> +			break;
> +
> +		i++;
> +	}

	for (; i < instrlen && instr[i] == 0x90; i++)
		;

perhaps? (possibly too dense, up to you)

> +
> +	nnops = i - off;
> +
> +	if (nnops <= 1)
> +		return nnops;

!nnops would be an error, no?

> +
> +	local_irq_save(flags);
> +	add_nops(instr + off, nnops);
> +	local_irq_restore(flags);
> +
> +	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ",
> +		   instr, off, i);
> +
> +	return nnops;
> +}
> +
> +

We really needs that extra line?

>  /*
>   * "noinline" to cause control flow change and thus invalidate I$ and
>   * cause refetch after modification.
>   */
>  static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
>  {
>  	struct insn insn;
> +	int i = 0;
>  
>  	/*
> +	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
> +	 * ones.
>  	 */
>  	for (;;) {
>  		if (insn_decode_kernel(&insn, &instr[i]))
>  			return;
>  
> +		/*
> +		 * See if this and any potentially following NOPs can be
> +		 * optimized.
> +		 */
>  		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
> +			i += optimize_nops_range(instr, a->instrlen, i);
> +		else
> +			i += insn.length;
>  
> +		if (i >= a->instrlen)
>  			return;
>  	}
>  }

Anyway, irrespective of nits:

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-02  8:14 ` Peter Zijlstra
@ 2021-06-02 10:37   ` Borislav Petkov
  0 siblings, 0 replies; 5+ messages in thread
From: Borislav Petkov @ 2021-06-02 10:37 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: X86 ML, LKML, Richard Narron

On Wed, Jun 02, 2021 at 10:14:58AM +0200, Peter Zijlstra wrote:
> Who again insisted that wouldn't happen? :-)

Yours truly, ofc. :-P

> That's almost a proper comment, might as well finish it
> 
> /*
>  * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
>  * @instr: instruction byte stream
>  * @instrlen: length of the above
>  * @off: offset within @instr where the first NOP has been detected
>  *
>  * Return: number of NOPs found (and replaced)
>  */

Done.

> 	for (; i < instrlen && instr[i] == 0x90; i++)
> 		;
> 
> perhaps? (possibly too dense, up to you)

Yeah, let's leave it simple so that one can read it at a quick glance.

> > +
> > +	nnops = i - off;
> > +
> > +	if (nnops <= 1)
> > +		return nnops;
> 
> !nnops would be an error, no?

Yeah, just being overly cautious. It should not be 0 since we're being
called for the byte at offset being 0x90. But I have said "should not
be" before and have fallen flat on my face.

> We really needs that extra line?

Zapped.

> Anyway, irrespective of nits:
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks, all except one incorporated.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip: x86/urgent] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
  2021-06-02  8:14 ` Peter Zijlstra
@ 2021-06-02 19:49 ` tip-bot2 for Borislav Petkov
  2021-06-03 14:38 ` tip-bot2 for Borislav Petkov
  2 siblings, 0 replies; 5+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2021-06-02 19:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Richard Narron, Borislav Petkov, Peter Zijlstra (Intel),
	x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     f01775b09efa3b9d1f4a2519678f11d274362da9
Gitweb:        https://git.kernel.org/tip/f01775b09efa3b9d1f4a2519678f11d274362da9
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Tue, 01 Jun 2021 17:51:22 +02:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 02 Jun 2021 12:36:07 +02:00

x86/alternative: Optimize single-byte NOPs at an arbitrary position

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
---
 arch/x86/kernel/alternative.c | 64 ++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b51..6fe5b44 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -183,41 +183,69 @@ done:
 }
 
 /*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+	return nnops;
+}
+
+/*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [tip: x86/urgent] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
  2021-06-02  8:14 ` Peter Zijlstra
  2021-06-02 19:49 ` [tip: x86/urgent] " tip-bot2 for Borislav Petkov
@ 2021-06-03 14:38 ` tip-bot2 for Borislav Petkov
  2 siblings, 0 replies; 5+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2021-06-03 14:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Richard Narron, Borislav Petkov, Peter Zijlstra (Intel),
	x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     2b31e8ed96b260ce2c22bd62ecbb9458399e3b62
Gitweb:        https://git.kernel.org/tip/2b31e8ed96b260ce2c22bd62ecbb9458399e3b62
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Tue, 01 Jun 2021 17:51:22 +02:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Thu, 03 Jun 2021 16:33:09 +02:00

x86/alternative: Optimize single-byte NOPs at an arbitrary position

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
---
 arch/x86/kernel/alternative.c | 64 ++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b51..6fe5b44 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -183,41 +183,69 @@ done:
 }
 
 /*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+	return nnops;
+}
+
+/*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-06-03 14:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
2021-06-02  8:14 ` Peter Zijlstra
2021-06-02 10:37   ` Borislav Petkov
2021-06-02 19:49 ` [tip: x86/urgent] " tip-bot2 for Borislav Petkov
2021-06-03 14:38 ` tip-bot2 for Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).