All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position
@ 2021-06-01 21:21 Borislav Petkov
  2021-06-02  8:14 ` Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Borislav Petkov @ 2021-06-01 21:21 UTC (permalink / raw)
  To: X86 ML; +Cc: LKML, Richard Narron

From: Borislav Petkov <bp@suse.de>

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/alternative.c | 62 +++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b5174495..7baf13b11952 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -182,42 +182,68 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
 		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
 }
 
+/*
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ",
+		   instr, off, i);
+
+	return nnops;
+}
+
+
 /*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
@ 2021-06-02  8:14 ` Peter Zijlstra
  2021-06-02 10:37   ` Borislav Petkov
  2021-06-02 19:49 ` [tip: x86/urgent] " tip-bot2 for Borislav Petkov
  2021-06-03 14:38 ` tip-bot2 for Borislav Petkov
  2 siblings, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2021-06-02  8:14 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: X86 ML, LKML, Richard Narron

On Tue, Jun 01, 2021 at 11:21:25PM +0200, Borislav Petkov wrote:
> From: Borislav Petkov <bp@suse.de>
> 
> Up until now the assumption was that an alternative patching site would
> have some instructions at the beginning and trailing single-byte NOPs
> (0x90) padding. Therefore, the patching machinery would go and optimize
> those single-byte NOPs into longer ones.
> 
> However, this assumption is broken on 32-bit when code like
> hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
> killer CALL_NOSPEC. The 32-bit version of that macro would align certain
> insns to 16 bytes, leading to the compiler issuing a one or more
> single-byte NOPs, depending on the holes it needs to fill for alignment.

Who again insisted that wouldn't happen? :-)

> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 6974b5174495..7baf13b11952 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -182,42 +182,68 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
>  		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
>  }
>  
> +/*
> + * @instr: instruction byte stream
> + * @instrlen: length of the above
> + * @off: offset within @instr where the first NOP has been detected
> + */

That's almost a proper comment, might as well finish it

/*
 * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
 * @instr: instruction byte stream
 * @instrlen: length of the above
 * @off: offset within @instr where the first NOP has been detected
 *
 * Return: number of NOPs found (and replaced)
 */
> +static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
> +{
> +	unsigned long flags;
> +	int i = off, nnops;
> +
> +	while (i < instrlen) {
> +		if (instr[i] != 0x90)
> +			break;
> +
> +		i++;
> +	}

	for (; i < instrlen && instr[i] == 0x90; i++)
		;

perhaps? (possibly too dense, up to you)

> +
> +	nnops = i - off;
> +
> +	if (nnops <= 1)
> +		return nnops;

!nnops would be an error, no?

> +
> +	local_irq_save(flags);
> +	add_nops(instr + off, nnops);
> +	local_irq_restore(flags);
> +
> +	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ",
> +		   instr, off, i);
> +
> +	return nnops;
> +}
> +
> +

We really needs that extra line?

>  /*
>   * "noinline" to cause control flow change and thus invalidate I$ and
>   * cause refetch after modification.
>   */
>  static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
>  {
>  	struct insn insn;
> +	int i = 0;
>  
>  	/*
> +	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
> +	 * ones.
>  	 */
>  	for (;;) {
>  		if (insn_decode_kernel(&insn, &instr[i]))
>  			return;
>  
> +		/*
> +		 * See if this and any potentially following NOPs can be
> +		 * optimized.
> +		 */
>  		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
> +			i += optimize_nops_range(instr, a->instrlen, i);
> +		else
> +			i += insn.length;
>  
> +		if (i >= a->instrlen)
>  			return;
>  	}
>  }

Anyway, irrespective of nits:

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-02  8:14 ` Peter Zijlstra
@ 2021-06-02 10:37   ` Borislav Petkov
  0 siblings, 0 replies; 5+ messages in thread
From: Borislav Petkov @ 2021-06-02 10:37 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: X86 ML, LKML, Richard Narron

On Wed, Jun 02, 2021 at 10:14:58AM +0200, Peter Zijlstra wrote:
> Who again insisted that wouldn't happen? :-)

Yours truly, ofc. :-P

> That's almost a proper comment, might as well finish it
> 
> /*
>  * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
>  * @instr: instruction byte stream
>  * @instrlen: length of the above
>  * @off: offset within @instr where the first NOP has been detected
>  *
>  * Return: number of NOPs found (and replaced)
>  */

Done.

> 	for (; i < instrlen && instr[i] == 0x90; i++)
> 		;
> 
> perhaps? (possibly too dense, up to you)

Yeah, let's leave it simple so that one can read it at a quick glance.

> > +
> > +	nnops = i - off;
> > +
> > +	if (nnops <= 1)
> > +		return nnops;
> 
> !nnops would be an error, no?

Yeah, just being overly cautious. It should not be 0 since we're being
called for the byte at offset being 0x90. But I have said "should not
be" before and have fallen flat on my face.

> We really needs that extra line?

Zapped.

> Anyway, irrespective of nits:
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks, all except one incorporated.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip: x86/urgent] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
  2021-06-02  8:14 ` Peter Zijlstra
@ 2021-06-02 19:49 ` tip-bot2 for Borislav Petkov
  2021-06-03 14:38 ` tip-bot2 for Borislav Petkov
  2 siblings, 0 replies; 5+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2021-06-02 19:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Richard Narron, Borislav Petkov, Peter Zijlstra (Intel),
	x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     f01775b09efa3b9d1f4a2519678f11d274362da9
Gitweb:        https://git.kernel.org/tip/f01775b09efa3b9d1f4a2519678f11d274362da9
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Tue, 01 Jun 2021 17:51:22 +02:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 02 Jun 2021 12:36:07 +02:00

x86/alternative: Optimize single-byte NOPs at an arbitrary position

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
---
 arch/x86/kernel/alternative.c | 64 ++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b51..6fe5b44 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -183,41 +183,69 @@ done:
 }
 
 /*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+	return nnops;
+}
+
+/*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [tip: x86/urgent] x86/alternative: Optimize single-byte NOPs at an arbitrary position
  2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
  2021-06-02  8:14 ` Peter Zijlstra
  2021-06-02 19:49 ` [tip: x86/urgent] " tip-bot2 for Borislav Petkov
@ 2021-06-03 14:38 ` tip-bot2 for Borislav Petkov
  2 siblings, 0 replies; 5+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2021-06-03 14:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Richard Narron, Borislav Petkov, Peter Zijlstra (Intel),
	x86, linux-kernel

The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     2b31e8ed96b260ce2c22bd62ecbb9458399e3b62
Gitweb:        https://git.kernel.org/tip/2b31e8ed96b260ce2c22bd62ecbb9458399e3b62
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Tue, 01 Jun 2021 17:51:22 +02:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Thu, 03 Jun 2021 16:33:09 +02:00

x86/alternative: Optimize single-byte NOPs at an arbitrary position

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
---
 arch/x86/kernel/alternative.c | 64 ++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b51..6fe5b44 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -183,41 +183,69 @@ done:
 }
 
 /*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+	return nnops;
+}
+
+/*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-06-03 14:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01 21:21 [PATCH] x86/alternative: Optimize single-byte NOPs at an arbitrary position Borislav Petkov
2021-06-02  8:14 ` Peter Zijlstra
2021-06-02 10:37   ` Borislav Petkov
2021-06-02 19:49 ` [tip: x86/urgent] " tip-bot2 for Borislav Petkov
2021-06-03 14:38 ` tip-bot2 for Borislav Petkov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.