linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/alternatives: More nops and some shrinkage
@ 2023-05-15  9:28 Peter Zijlstra
  2023-05-15  9:28 ` [PATCH 1/2] x86_64: Longer NOPs Peter Zijlstra
       [not found] ` <20230515093020.729622326@infradead.org>
  0 siblings, 2 replies; 6+ messages in thread
From: Peter Zijlstra @ 2023-05-15  9:28 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz, mhiramat, Andrew.Cooper3, jpoimboe

Hi Boris,

I went through my pile after seeing the x86/alternatives tip-bot messages and
found these two stragglers hadn't yet made it out.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] x86_64: Longer NOPs
  2023-05-15  9:28 [PATCH 0/2] x86/alternatives: More nops and some shrinkage Peter Zijlstra
@ 2023-05-15  9:28 ` Peter Zijlstra
  2023-05-31 11:57   ` [tip: x86/alternatives] x86/alternatives: Add longer 64-bit NOPs tip-bot2 for Peter Zijlstra
       [not found] ` <20230515093020.729622326@infradead.org>
  1 sibling, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2023-05-15  9:28 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz, mhiramat, Andrew.Cooper3, jpoimboe

By adding support for longer NOPs there are a few more alternatives
that can turn into a single instruction.

Add up to NOP11, the same limit where GNU as .nops also stops
generating longer nops. This is because a number of uarchs have severe
decode penalties for more than 3 prefixes.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/nops.h   |   16 ++++++++++++++--
 arch/x86/kernel/alternative.c |   10 ++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -34,6 +34,8 @@
 #define BYTES_NOP7	0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
 #define BYTES_NOP8	0x3e,BYTES_NOP7
 
+#define ASM_NOP_MAX 8
+
 #else
 
 /*
@@ -47,6 +49,9 @@
  * 6: osp nopl 0x00(%eax,%eax,1)
  * 7: nopl 0x00000000(%eax)
  * 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
  */
 #define BYTES_NOP1	0x90
 #define BYTES_NOP2	0x66,BYTES_NOP1
@@ -56,6 +61,15 @@
 #define BYTES_NOP6	0x66,BYTES_NOP5
 #define BYTES_NOP7	0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
 #define BYTES_NOP8	0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9	0x2e,BYTES_NOP8
+#define BYTES_NOP10	0x66,BYTES_NOP9
+#define BYTES_NOP11	0x66,BYTES_NOP10
+
+#define ASM_NOP9  _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
+
+#define ASM_NOP_MAX 11
 
 #endif /* CONFIG_64BIT */
 
@@ -68,8 +82,6 @@
 #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
 #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
-#define ASM_NOP_MAX 8
-
 #ifndef __ASSEMBLY__
 extern const unsigned char * const x86_nops[];
 #endif
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -98,6 +98,11 @@ static const unsigned char x86nops[] =
 	BYTES_NOP6,
 	BYTES_NOP7,
 	BYTES_NOP8,
+#ifdef CONFIG_64BIT
+	BYTES_NOP9,
+	BYTES_NOP10,
+	BYTES_NOP11,
+#endif
 };
 
 const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
@@ -111,6 +116,11 @@ const unsigned char * const x86_nops[ASM
 	x86nops + 1 + 2 + 3 + 4 + 5,
 	x86nops + 1 + 2 + 3 + 4 + 5 + 6,
 	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+#ifdef CONFIG_64BIT
+	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9,
+	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10,
+#endif
 };
 
 /*



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] x86: Shorten RESET_CALL_DEPTH
       [not found] ` <20230515093020.729622326@infradead.org>
@ 2023-05-15  9:47   ` Andrew.Cooper3
  2023-05-15 10:26     ` Peter Zijlstra
  2023-05-31 11:57   ` [tip: x86/alternatives] x86/nospec: " tip-bot2 for Peter Zijlstra
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew.Cooper3 @ 2023-05-15  9:47 UTC (permalink / raw)
  To: Peter Zijlstra, x86; +Cc: linux-kernel, mhiramat, jpoimboe

On 15/05/2023 10:28 am, Peter Zijlstra wrote:
> RESET_CALL_DEPTH is a pretty fat monster and blows up UNTRAIN_RET to
> 20 bytes:
>
>   19:       48 c7 c0 80 00 00 00    mov    $0x80,%rax
>   20:       48 c1 e0 38             shl    $0x38,%rax
>   24:       65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0     29: R_X86_64_32S        pcpu_hot+0x10
>
> Shrink it by 4 bytes:
>
>   0:   31 c0                   xor    %eax,%eax
>   2:   48 0f ba e8 3f          bts    $0x3f,%rax
>   7:   65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0
>
> Shrink RESET_CALL_DEPTH_FROM_CALL by 5 bytes by only setting al, the
> other bits are shifted out (the same could be done for
> RESET_CALL_DEPTH, but the xor+bts sequence has less depencies due to
> the zeroing).
>
> Suggested-by: Andrew.Cooper3@citrix.com

Andrew Cooper <andrew.cooper3@citrix.com>

> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/include/asm/nospec-branch.h |    6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -84,12 +84,12 @@
>  	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
>  
>  #define RESET_CALL_DEPTH					\
> -	mov	$0x80, %rax;					\
> -	shl	$56, %rax;					\
> +	xor	%eax, %eax;					\
> +	bts	$59, %rax;					\

$63 ?

The disassembly looks correct.

~Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] x86: Shorten RESET_CALL_DEPTH
  2023-05-15  9:47   ` [PATCH 2/2] x86: Shorten RESET_CALL_DEPTH Andrew.Cooper3
@ 2023-05-15 10:26     ` Peter Zijlstra
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Zijlstra @ 2023-05-15 10:26 UTC (permalink / raw)
  To: Andrew.Cooper3; +Cc: x86, linux-kernel, mhiramat, jpoimboe

On Mon, May 15, 2023 at 10:47:42AM +0100, Andrew.Cooper3@citrix.com wrote:
> On 15/05/2023 10:28 am, Peter Zijlstra wrote:

> > Shrink it by 4 bytes:
> >
> >   0:   31 c0                   xor    %eax,%eax
> >   2:   48 0f ba e8 3f          bts    $0x3f,%rax
> >   7:   65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0

> >  #define RESET_CALL_DEPTH					\
> > -	mov	$0x80, %rax;					\
> > -	shl	$56, %rax;					\
> > +	xor	%eax, %eax;					\
> > +	bts	$59, %rax;					\
> 
> $63 ?
> 
> The disassembly looks correct.

Yeah, uhmm, clearly I fixed it somewhere but not on the version I send
out :-(

Too bad we need the RAX.W prefix...

---
Subject: x86: Shorten RESET_CALL_DEPTH
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri Feb 10 10:10:57 CET 2023

RESET_CALL_DEPTH is a pretty fat monster and blows up UNTRAIN_RET to
20 bytes:

  19:       48 c7 c0 80 00 00 00    mov    $0x80,%rax
  20:       48 c1 e0 38             shl    $0x38,%rax
  24:       65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0     29: R_X86_64_32S        pcpu_hot+0x10

Shrink it by 4 bytes:

  0:   31 c0                   xor    %eax,%eax
  2:   48 0f ba e8 3f          bts    $0x3f,%rax
  7:   65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0

Shrink RESET_CALL_DEPTH_FROM_CALL by 5 bytes by only setting al, the
other bits are shifted out (the same could be done for
RESET_CALL_DEPTH, but the xor+bts sequence has less depencies due to
the zeroing).

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/nospec-branch.h |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -84,12 +84,12 @@
 	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH					\
-	mov	$0x80, %rax;					\
-	shl	$56, %rax;					\
+	xor	%eax, %eax;					\
+	bts	$63, %rax;					\
 	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH_FROM_CALL				\
-	mov	$0xfc, %rax;					\
+	movb	$0xfc, %al;					\
 	shl	$56, %rax;					\
 	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
 	CALL_THUNKS_DEBUG_INC_CALLS

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip: x86/alternatives] x86/nospec: Shorten RESET_CALL_DEPTH
       [not found] ` <20230515093020.729622326@infradead.org>
  2023-05-15  9:47   ` [PATCH 2/2] x86: Shorten RESET_CALL_DEPTH Andrew.Cooper3
@ 2023-05-31 11:57   ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 6+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-05-31 11:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Andrew Cooper, Peter Zijlstra (Intel), Borislav Petkov (AMD),
	x86, linux-kernel

The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID:     3496d1c64a0fcc9bae3ed40decc3ecd7f8ac072f
Gitweb:        https://git.kernel.org/tip/3496d1c64a0fcc9bae3ed40decc3ecd7f8ac072f
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 10 Feb 2023 10:10:57 
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 31 May 2023 13:40:57 +02:00

x86/nospec: Shorten RESET_CALL_DEPTH

RESET_CALL_DEPTH is a pretty fat monster and blows up UNTRAIN_RET to
20 bytes:

  19:       48 c7 c0 80 00 00 00    mov    $0x80,%rax
  20:       48 c1 e0 38             shl    $0x38,%rax
  24:       65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0     29: R_X86_64_32S        pcpu_hot+0x10

Shrink it by 4 bytes:

  0:   31 c0				xor %eax,%eax
  2:   48 0f ba e8 3f			bts $0x3f,%rax
  7:   65 48 89 04 25 00 00 00 00	mov %rax,%gs:0x0

Shrink RESET_CALL_DEPTH_FROM_CALL by 5 bytes by only setting %al, the
other bits are shifted out (the same could be done for RESET_CALL_DEPTH,
but the XOR+BTS sequence has less dependencies due to the zeroing).

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230515093020.729622326@infradead.org
---
 arch/x86/include/asm/nospec-branch.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index edb2b0c..55388c9 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -84,12 +84,12 @@
 	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH					\
-	mov	$0x80, %rax;					\
-	shl	$56, %rax;					\
+	xor	%eax, %eax;					\
+	bts	$63, %rax;					\
 	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH_FROM_CALL				\
-	mov	$0xfc, %rax;					\
+	movb	$0xfc, %al;					\
 	shl	$56, %rax;					\
 	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
 	CALL_THUNKS_DEBUG_INC_CALLS

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [tip: x86/alternatives] x86/alternatives: Add longer 64-bit NOPs
  2023-05-15  9:28 ` [PATCH 1/2] x86_64: Longer NOPs Peter Zijlstra
@ 2023-05-31 11:57   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 6+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-05-31 11:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel), Borislav Petkov (AMD), x86, linux-kernel

The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID:     df25edbac31ea87b488789d44a362063542b5967
Gitweb:        https://git.kernel.org/tip/df25edbac31ea87b488789d44a362063542b5967
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Mon, 15 May 2023 11:28:05 +02:00
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 31 May 2023 10:21:21 +02:00

x86/alternatives: Add longer 64-bit NOPs

By adding support for longer NOPs there are a few more alternatives
that can turn into a single instruction.

Add up to NOP11, the same limit where GNU as .nops also stops
generating longer nops. This is because a number of uarchs have severe
decode penalties for more than 3 prefixes.

  [ bp: Sync up with the version in tools/ while at it. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230515093020.661756940@infradead.org
---
 arch/x86/include/asm/nops.h       | 16 ++++++++++++++--
 arch/x86/kernel/alternative.c     | 10 ++++++++++
 tools/arch/x86/include/asm/nops.h | 16 ++++++++++++++--
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index c5573ea..1c1b755 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -34,6 +34,8 @@
 #define BYTES_NOP7	0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
 #define BYTES_NOP8	0x3e,BYTES_NOP7
 
+#define ASM_NOP_MAX 8
+
 #else
 
 /*
@@ -47,6 +49,9 @@
  * 6: osp nopl 0x00(%eax,%eax,1)
  * 7: nopl 0x00000000(%eax)
  * 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
  */
 #define BYTES_NOP1	0x90
 #define BYTES_NOP2	0x66,BYTES_NOP1
@@ -56,6 +61,15 @@
 #define BYTES_NOP6	0x66,BYTES_NOP5
 #define BYTES_NOP7	0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
 #define BYTES_NOP8	0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9	0x2e,BYTES_NOP8
+#define BYTES_NOP10	0x66,BYTES_NOP9
+#define BYTES_NOP11	0x66,BYTES_NOP10
+
+#define ASM_NOP9  _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
+
+#define ASM_NOP_MAX 11
 
 #endif /* CONFIG_64BIT */
 
@@ -68,8 +82,6 @@
 #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
 #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
-#define ASM_NOP_MAX 8
-
 #ifndef __ASSEMBLY__
 extern const unsigned char * const x86_nops[];
 #endif
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 93aa95a..0747d29 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -98,6 +98,11 @@ static const unsigned char x86nops[] =
 	BYTES_NOP6,
 	BYTES_NOP7,
 	BYTES_NOP8,
+#ifdef CONFIG_64BIT
+	BYTES_NOP9,
+	BYTES_NOP10,
+	BYTES_NOP11,
+#endif
 };
 
 const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
@@ -111,6 +116,11 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
 	x86nops + 1 + 2 + 3 + 4 + 5,
 	x86nops + 1 + 2 + 3 + 4 + 5 + 6,
 	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+#ifdef CONFIG_64BIT
+	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9,
+	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10,
+#endif
 };
 
 /*
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index c5573ea..1c1b755 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -34,6 +34,8 @@
 #define BYTES_NOP7	0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
 #define BYTES_NOP8	0x3e,BYTES_NOP7
 
+#define ASM_NOP_MAX 8
+
 #else
 
 /*
@@ -47,6 +49,9 @@
  * 6: osp nopl 0x00(%eax,%eax,1)
  * 7: nopl 0x00000000(%eax)
  * 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
  */
 #define BYTES_NOP1	0x90
 #define BYTES_NOP2	0x66,BYTES_NOP1
@@ -56,6 +61,15 @@
 #define BYTES_NOP6	0x66,BYTES_NOP5
 #define BYTES_NOP7	0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
 #define BYTES_NOP8	0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9	0x2e,BYTES_NOP8
+#define BYTES_NOP10	0x66,BYTES_NOP9
+#define BYTES_NOP11	0x66,BYTES_NOP10
+
+#define ASM_NOP9  _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
+
+#define ASM_NOP_MAX 11
 
 #endif /* CONFIG_64BIT */
 
@@ -68,8 +82,6 @@
 #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
 #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
-#define ASM_NOP_MAX 8
-
 #ifndef __ASSEMBLY__
 extern const unsigned char * const x86_nops[];
 #endif

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-05-31 11:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-15  9:28 [PATCH 0/2] x86/alternatives: More nops and some shrinkage Peter Zijlstra
2023-05-15  9:28 ` [PATCH 1/2] x86_64: Longer NOPs Peter Zijlstra
2023-05-31 11:57   ` [tip: x86/alternatives] x86/alternatives: Add longer 64-bit NOPs tip-bot2 for Peter Zijlstra
     [not found] ` <20230515093020.729622326@infradead.org>
2023-05-15  9:47   ` [PATCH 2/2] x86: Shorten RESET_CALL_DEPTH Andrew.Cooper3
2023-05-15 10:26     ` Peter Zijlstra
2023-05-31 11:57   ` [tip: x86/alternatives] x86/nospec: " tip-bot2 for Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).