* [PATCH V6 1/8] x86/traps: Move pt_regs only in fixup_bad_iret()
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 2/8] x86/entry: Switch the stack after error_entry() returns Lai Jiangshan
` (6 subsequent siblings)
7 siblings, 0 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin, Kirill A. Shutemov, Fenghua Yu,
Chang S. Bae
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Always stash the address error_entry() is going to return to, in %r12
and get rid of the void *error_entry_ret; slot in struct bad_iret_stack
which was supposed to account for it and pt_regs pushed on the stack.
After this, both fixup_bad_iret() and sync_regs() can work on a
struct pt_regs pointer directly.
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 5 ++++-
arch/x86/include/asm/traps.h | 2 +-
arch/x86/kernel/traps.c | 18 ++++++------------
3 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73d958522b6a..ecbfca3cc18c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1061,9 +1061,12 @@ SYM_CODE_START_LOCAL(error_entry)
* Pretend that the exception came from user mode: set up pt_regs
* as if we faulted immediately after IRET.
*/
- mov %rsp, %rdi
+ popq %r12 /* save return addr in %12 */
+ movq %rsp, %rdi /* arg0 = pt_regs pointer */
call fixup_bad_iret
mov %rax, %rsp
+ ENCODE_FRAME_POINTER
+ pushq %r12
jmp .Lerror_entry_from_usermode_after_swapgs
SYM_CODE_END(error_entry)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 35317c5c551d..47ecfff2c83d 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,7 +13,7 @@
#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
void __init trap_init(void);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a4e2efde5d1f..111b18d57a54 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -898,13 +898,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
}
#endif
-struct bad_iret_stack {
- void *error_entry_ret;
- struct pt_regs regs;
-};
-
-asmlinkage __visible noinstr
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
{
/*
* This is called from entry_64.S early in handling a fault
@@ -914,19 +908,19 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
* just below the IRET frame) and we want to pretend that the
* exception came from the IRET target.
*/
- struct bad_iret_stack tmp, *new_stack =
- (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+ struct pt_regs tmp, *new_stack =
+ (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
/* Copy the IRET target to the temporary storage. */
- __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
+ __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
/* Copy the remainder of the stack from the current stack. */
- __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
+ __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
/* Update the entry stack */
__memcpy(new_stack, &tmp, sizeof(tmp));
- BUG_ON(!user_mode(&new_stack->regs));
+ BUG_ON(!user_mode(new_stack));
return new_stack;
}
#endif
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH V6 2/8] x86/entry: Switch the stack after error_entry() returns
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 1/8] x86/traps: Move pt_regs only in fixup_bad_iret() Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() Lai Jiangshan
` (5 subsequent siblings)
7 siblings, 0 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
error_entry() calls sync_regs(), and fixup_bad_iret() before sync_regs()
if it is a fault from bad IRET, to copy the pt_regs to the kernel stack
and switches the kernel stack directly after sync_regs().
But error_entry() itself is also a function call, so the code has to
stash the address error_entry() is going to return to, in %r12 and
makes the work complicated.
Move the code of switching stack after error_entry() and get rid of the
need to handle the return address.
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ecbfca3cc18c..ca3e99e08a44 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -326,6 +326,8 @@ SYM_CODE_END(ret_from_fork)
.macro idtentry_body cfunc has_error_code:req
call error_entry
+ movq %rax, %rsp /* switch to the task stack if from userspace */
+ ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
@@ -1002,14 +1004,10 @@ SYM_CODE_START_LOCAL(error_entry)
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
- popq %r12 /* save return addr in %12 */
- movq %rsp, %rdi /* arg0 = pt_regs pointer */
call sync_regs
- movq %rax, %rsp /* switch stack */
- ENCODE_FRAME_POINTER
- pushq %r12
RET
/*
@@ -1041,6 +1039,7 @@ SYM_CODE_START_LOCAL(error_entry)
*/
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
+ leaq 8(%rsp), %rax /* return pt_regs pointer */
RET
.Lbstep_iret:
@@ -1061,12 +1060,9 @@ SYM_CODE_START_LOCAL(error_entry)
* Pretend that the exception came from user mode: set up pt_regs
* as if we faulted immediately after IRET.
*/
- popq %r12 /* save return addr in %12 */
- movq %rsp, %rdi /* arg0 = pt_regs pointer */
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
call fixup_bad_iret
- mov %rax, %rsp
- ENCODE_FRAME_POINTER
- pushq %r12
+ mov %rax, %rdi
jmp .Lerror_entry_from_usermode_after_swapgs
SYM_CODE_END(error_entry)
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 1/8] x86/traps: Move pt_regs only in fixup_bad_iret() Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 2/8] x86/entry: Switch the stack after error_entry() returns Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-27 17:45 ` Borislav Petkov
2022-04-21 14:10 ` [PATCH V6 4/8] x86/entry: Move cld to the start of idtentry macro Lai Jiangshan
` (4 subsequent siblings)
7 siblings, 1 reply; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
The macro idtentry calls error_entry() unconditionally even on XENPV.
But the code XENPV needs in error_entry() is PUSH_AND_CLEAR_REGS only.
And error_entry() also calls sync_regs() which has to deal with the
case of XENPV via an extra branch so that it doesn't copy the pt_regs.
And PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to
its original place when the function returns, which means it is not
possible to convert it to a C function.
Move PUSH_AND_CLEAR_REGS out of error_entry(), add a function to wrap
PUSH_AND_CLEAR_REGS and call it before error_entry().
The new function call adds two instructions (CALL and RET) for every
interrupt or exception. It will allow for error_entry() to be not
called on XENPV which allows for sync_regs() to reduce a branch. It
will also allow for error_entry() to be converted to C code that can
use inlined sync_regs() and save a function call again.
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ca3e99e08a44..b1cef3b0a7ab 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -318,6 +318,14 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
+/* Save all registers in pt_regs */
+SYM_CODE_START_LOCAL(push_and_clear_regs)
+ UNWIND_HINT_FUNC
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+ RET
+SYM_CODE_END(push_and_clear_regs)
+
/**
* idtentry_body - Macro to emit code calling the C function
* @cfunc: C function to be called
@@ -325,6 +333,9 @@ SYM_CODE_END(ret_from_fork)
*/
.macro idtentry_body cfunc has_error_code:req
+ call push_and_clear_regs
+ UNWIND_HINT_REGS
+
call error_entry
movq %rax, %rsp /* switch to the task stack if from userspace */
ENCODE_FRAME_POINTER
@@ -985,13 +996,11 @@ SYM_CODE_START_LOCAL(paranoid_exit)
SYM_CODE_END(paranoid_exit)
/*
- * Save all registers in pt_regs, and switch GS if needed.
+ * Switch GS and CR3 if needed.
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
cld
- PUSH_AND_CLEAR_REGS save_ret=1
- ENCODE_FRAME_POINTER 8
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
2022-04-21 14:10 ` [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() Lai Jiangshan
@ 2022-04-27 17:45 ` Borislav Petkov
2022-04-28 0:33 ` Lai Jiangshan
0 siblings, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2022-04-27 17:45 UTC (permalink / raw)
To: Lai Jiangshan
Cc: linux-kernel, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin
On Thu, Apr 21, 2022 at 10:10:50PM +0800, Lai Jiangshan wrote:
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>
> The macro idtentry calls error_entry() unconditionally even on XENPV.
> But the code XENPV needs in error_entry() is PUSH_AND_CLEAR_REGS only.
> And error_entry() also calls sync_regs() which has to deal with the
> case of XENPV via an extra branch so that it doesn't copy the pt_regs.
What extra branch?
Do you mean the
if (regs != eregs)
test in sync_regs()?
I'm confused. Are you, per chance, aiming to optimize XENPV here or
what's up?
> And PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to
> its original place when the function returns, which means it is not
> possible to convert it to a C function.
>
> Move PUSH_AND_CLEAR_REGS out of error_entry(), add a function to wrap
> PUSH_AND_CLEAR_REGS and call it before error_entry().
>
> The new function call adds two instructions (CALL and RET) for every
> interrupt or exception.
Not only - it pushes all the regs in PUSH_AND_CLEAR_REGS too. I don't
understand why that matters here? It was done in error_entry anyway.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
2022-04-27 17:45 ` Borislav Petkov
@ 2022-04-28 0:33 ` Lai Jiangshan
2022-04-28 10:26 ` Borislav Petkov
2022-05-02 12:42 ` Juergen Gross
0 siblings, 2 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-28 0:33 UTC (permalink / raw)
To: Borislav Petkov
Cc: LKML, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, X86 ML, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin
On Thu, Apr 28, 2022 at 1:45 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Thu, Apr 21, 2022 at 10:10:50PM +0800, Lai Jiangshan wrote:
> > From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> >
> > The macro idtentry calls error_entry() unconditionally even on XENPV.
> > But the code XENPV needs in error_entry() is PUSH_AND_CLEAR_REGS only.
> > And error_entry() also calls sync_regs() which has to deal with the
> > case of XENPV via an extra branch so that it doesn't copy the pt_regs.
>
> What extra branch?
>
> Do you mean the
>
> if (regs != eregs)
>
> test in sync_regs()?
Hello, Borislav
Yes.
>
> I'm confused. Are you, per chance, aiming to optimize XENPV here or
> what's up?
The branch in sync_regs() can be optimized out for the non-XENPV case
since XENPV doesn't call sync_regs() after patch5 which makes XENPV
not call error_entry().
The aim of this patch and most of the patchset is to make
error_entry() be able to be converted to C. And XENPV cases can
also be optimized in the patchset although it is not the major main.
>
> > And PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to
> > its original place when the function returns, which means it is not
> > possible to convert it to a C function.
> >
> > Move PUSH_AND_CLEAR_REGS out of error_entry(), add a function to wrap
> > PUSH_AND_CLEAR_REGS and call it before error_entry().
> >
> > The new function call adds two instructions (CALL and RET) for every
> > interrupt or exception.
>
> Not only - it pushes all the regs in PUSH_AND_CLEAR_REGS too. I don't
> understand why that matters here? It was done in error_entry anyway.
>
Compared to the original code, adding the new function call adds two
instructions (CALL and RET) for every interrupt or exception.
PUSH_AND_CLEAR_REGS is not extra instructions added here.
Since this patch adds extra overhead (CALL and RET), the changelog
has to explain why it is worth it not just for converting ASM to C.
The explanation in the changelog is that it can be offsetted by later
reduced overhead.
Thanks
Lai
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
2022-04-28 0:33 ` Lai Jiangshan
@ 2022-04-28 10:26 ` Borislav Petkov
2022-05-02 12:42 ` Juergen Gross
1 sibling, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2022-04-28 10:26 UTC (permalink / raw)
To: Lai Jiangshan
Cc: LKML, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, X86 ML, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin
On Thu, Apr 28, 2022 at 08:33:36AM +0800, Lai Jiangshan wrote:
> The branch in sync_regs() can be optimized out for the non-XENPV case
> since XENPV doesn't call sync_regs() after patch5 which makes XENPV
> not call error_entry().
I find it confusing why you're even mentioning optimizing a simple test.
That thing is basically a *whopping* two fastpath instructions:
# arch/x86/kernel/traps.c:853: if (regs != eregs)
cmpq %rdi, %rax # eregs, <retval>
je .L255 #,
Is this such a hot path that it shows in some profiles and removing that
test there is so important?
> The aim of this patch and most of the patchset is to make
> error_entry() be able to be converted to C. And XENPV cases can
> also be optimized in the patchset although it is not the major main.
So say just that.
> Since this patch adds extra overhead (CALL and RET), the changelog
> has to explain why it is worth it not just for converting ASM to C.
What is the practical relevance of this very minor overhead? Does it
show even in a microbenchmark?
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
2022-04-28 0:33 ` Lai Jiangshan
2022-04-28 10:26 ` Borislav Petkov
@ 2022-05-02 12:42 ` Juergen Gross
2022-05-02 18:09 ` Borislav Petkov
1 sibling, 1 reply; 20+ messages in thread
From: Juergen Gross @ 2022-05-02 12:42 UTC (permalink / raw)
To: Lai Jiangshan, Borislav Petkov
Cc: LKML, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, X86 ML, Lai Jiangshan, Ingo Molnar, Dave Hansen,
H. Peter Anvin
[-- Attachment #1.1.1: Type: text/plain, Size: 2655 bytes --]
On 28.04.22 02:33, Lai Jiangshan wrote:
> On Thu, Apr 28, 2022 at 1:45 AM Borislav Petkov <bp@alien8.de> wrote:
>>
>> On Thu, Apr 21, 2022 at 10:10:50PM +0800, Lai Jiangshan wrote:
>>> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>>>
>>> The macro idtentry calls error_entry() unconditionally even on XENPV.
>>> But the code XENPV needs in error_entry() is PUSH_AND_CLEAR_REGS only.
>>> And error_entry() also calls sync_regs() which has to deal with the
>>> case of XENPV via an extra branch so that it doesn't copy the pt_regs.
>>
>> What extra branch?
>>
>> Do you mean the
>>
>> if (regs != eregs)
>>
>> test in sync_regs()?
>
> Hello, Borislav
>
> Yes.
>
>>
>> I'm confused. Are you, per chance, aiming to optimize XENPV here or
>> what's up?
>
>
> The branch in sync_regs() can be optimized out for the non-XENPV case
> since XENPV doesn't call sync_regs() after patch5 which makes XENPV
> not call error_entry().
>
> The aim of this patch and most of the patchset is to make
> error_entry() be able to be converted to C. And XENPV cases can
> also be optimized in the patchset although it is not the major main.
>
>>
>>> And PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to
>>> its original place when the function returns, which means it is not
>>> possible to convert it to a C function.
>>>
>>> Move PUSH_AND_CLEAR_REGS out of error_entry(), add a function to wrap
>>> PUSH_AND_CLEAR_REGS and call it before error_entry().
>>>
>>> The new function call adds two instructions (CALL and RET) for every
>>> interrupt or exception.
>>
>> Not only - it pushes all the regs in PUSH_AND_CLEAR_REGS too. I don't
>> understand why that matters here? It was done in error_entry anyway.
>>
>
> Compared to the original code, adding the new function call adds two
> instructions (CALL and RET) for every interrupt or exception.
>
> PUSH_AND_CLEAR_REGS is not extra instructions added here.
>
> Since this patch adds extra overhead (CALL and RET), the changelog
> has to explain why it is worth it not just for converting ASM to C.
>
> The explanation in the changelog is that it can be offsetted by later
> reduced overhead.
I think you could avoid the extra call/ret by doing something like:
SYM_CODE_START_LOCAL(error_exit_push_and_save)
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
jmp error_exit
SYM_CODE_END(error_exit_push_and_save)
... and use this instead of patch 5:
ALTERNATIVE "call error_entry_push_and_save; movq %rax, %rsp", \
"call push_and_clear_regs", X86_FEATURE_XENPV
Juergen
[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()
2022-05-02 12:42 ` Juergen Gross
@ 2022-05-02 18:09 ` Borislav Petkov
0 siblings, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2022-05-02 18:09 UTC (permalink / raw)
To: Juergen Gross
Cc: Lai Jiangshan, LKML, Peter Zijlstra, Josh Poimboeuf,
Andy Lutomirski, Thomas Gleixner, X86 ML, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin
On Mon, May 02, 2022 at 02:42:50PM +0200, Juergen Gross wrote:
> I think you could avoid the extra call/ret by doing something like:
>
> SYM_CODE_START_LOCAL(error_exit_push_and_save)
> UNWIND_HINT_FUNC
> PUSH_AND_CLEAR_REGS save_ret=1
> ENCODE_FRAME_POINTER 8
> jmp error_exit
> SYM_CODE_END(error_exit_push_and_save)
>
> ... and use this instead of patch 5:
>
> ALTERNATIVE "call error_entry_push_and_save; movq %rax, %rsp", \
> "call push_and_clear_regs", X86_FEATURE_XENPV
I'm afraid I can't follow - you still need to call error_entry on
!XENPV. You have a function error_entry_push_and_save() but above it is
called error_*exit*_push_and_save...
Bottomline is, this should be as simple as possible code - an additional
CALL/RET doesn't really matter, perf-wise.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH V6 4/8] x86/entry: Move cld to the start of idtentry macro
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
` (2 preceding siblings ...)
2022-04-21 14:10 ` [PATCH V6 3/8] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 5/8] x86/entry: Don't call error_entry() for XENPV Lai Jiangshan
` (3 subsequent siblings)
7 siblings, 0 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Make it next to CLAC
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b1cef3b0a7ab..ab6ab6d3dab5 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -371,6 +371,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
ENDBR
ASM_CLAC
+ cld
.if \has_error_code == 0
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -439,6 +440,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS
ENDBR
ASM_CLAC
+ cld
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -495,6 +497,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS
ENDBR
ASM_CLAC
+ cld
/*
* If the entry is from userspace, switch stacks and treat it as
@@ -557,6 +560,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=8
ENDBR
ASM_CLAC
+ cld
/* paranoid_entry returns GS information for paranoid_exit in EBX. */
call paranoid_entry
@@ -882,7 +886,6 @@ SYM_CODE_END(xen_failsafe_callback)
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
- cld
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@@ -1000,7 +1003,6 @@ SYM_CODE_END(paranoid_exit)
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
- cld
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1134,6 +1136,7 @@ SYM_CODE_START(asm_exc_nmi)
*/
ASM_CLAC
+ cld
/* Use %rdx as our temp variable throughout */
pushq %rdx
@@ -1153,7 +1156,6 @@ SYM_CODE_START(asm_exc_nmi)
*/
swapgs
- cld
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH V6 5/8] x86/entry: Don't call error_entry() for XENPV
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
` (3 preceding siblings ...)
2022-04-21 14:10 ` [PATCH V6 4/8] x86/entry: Move cld to the start of idtentry macro Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS Lai Jiangshan
` (2 subsequent siblings)
7 siblings, 0 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
When in XENPV, it is already in the task stack, and it can't fault for
native_iret() nor native_load_gs_index() since XENPV uses its own pvops
for IRET and load_gs_index(). And it doesn't need to switch the CR3.
So there is no reason to call error_entry() in XENPV.
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ab6ab6d3dab5..062aa9d95961 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -336,8 +336,17 @@ SYM_CODE_END(push_and_clear_regs)
call push_and_clear_regs
UNWIND_HINT_REGS
- call error_entry
- movq %rax, %rsp /* switch to the task stack if from userspace */
+ /*
+ * Call error_entry() and switch to the task stack if from userspace.
+ *
+ * When in XENPV, it is already in the task stack, and it can't fault
+ * for native_iret() nor native_load_gs_index() since XENPV uses its
+ * own pvops for IRET and load_gs_index(). And it doesn't need to
+ * switch the CR3. So it can skip invoking error_entry().
+ */
+ ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+ "", X86_FEATURE_XENPV
+
ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
` (4 preceding siblings ...)
2022-04-21 14:10 ` [PATCH V6 5/8] x86/entry: Don't call error_entry() for XENPV Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-29 9:56 ` Borislav Petkov
2022-04-21 14:10 ` [PATCH V6 7/8] x86/entry: Remove the branch in sync_regs() Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 8/8] x86/entry: Use idtentry macro for entry_INT80_compat() Lai Jiangshan
7 siblings, 1 reply; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin, Kirill A. Shutemov
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
XENPV doesn't use swapgs_restore_regs_and_return_to_usermode(),
error_entry() and entry_SYSENTER_compat().
Change the PV-compatible SWAPGS to the ASM instruction swapgs in these
functions.
Also remove the definition of SWAPGS since there is no user of the
SWAPGS anymore.
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 6 +++---
arch/x86/entry/entry_64_compat.S | 2 +-
arch/x86/include/asm/irqflags.h | 8 --------
3 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 062aa9d95961..312186612f4e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1019,7 +1019,7 @@ SYM_CODE_START_LOCAL(error_entry)
* We entered from user mode or we're pretending to have entered
* from user mode due to an IRET fault.
*/
- SWAPGS
+ swapgs
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -1051,7 +1051,7 @@ SYM_CODE_START_LOCAL(error_entry)
* gsbase and proceed. We'll fix up the exception and land in
* .Lgs_change's error handler with kernel gsbase.
*/
- SWAPGS
+ swapgs
/*
* Issue an LFENCE to prevent GS speculation, regardless of whether it is a
@@ -1072,7 +1072,7 @@ SYM_CODE_START_LOCAL(error_entry)
* We came from an IRET to user mode, so we have user
* gsbase and CR3. Switch to kernel gsbase and CR3:
*/
- SWAPGS
+ swapgs
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 4fdb007cddbd..c5aeb0819707 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
UNWIND_HINT_EMPTY
ENDBR
/* Interrupts are off on entry. */
- SWAPGS
+ swapgs
pushq %rax
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 111104d1c2cd..7793e52d6237 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -137,14 +137,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_enable();
}
-#else
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_XEN_PV
-#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-#else
-#define SWAPGS swapgs
-#endif
-#endif
#endif /* !__ASSEMBLY__ */
#endif
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-04-21 14:10 ` [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS Lai Jiangshan
@ 2022-04-29 9:56 ` Borislav Petkov
2022-04-29 11:45 ` Lai Jiangshan
2022-05-02 12:18 ` Juergen Gross
0 siblings, 2 replies; 20+ messages in thread
From: Borislav Petkov @ 2022-04-29 9:56 UTC (permalink / raw)
To: Lai Jiangshan, Jürgen Gross
Cc: linux-kernel, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin, Kirill A. Shutemov
On Thu, Apr 21, 2022 at 10:10:53PM +0800, Lai Jiangshan wrote:
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index 4fdb007cddbd..c5aeb0819707 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
> UNWIND_HINT_EMPTY
> ENDBR
> /* Interrupts are off on entry. */
> - SWAPGS
> + swapgs
>
> pushq %rax
> SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
I'm not sure about this: why can't XENPV do a 32-bit syscall through the vdso?
Also, looking at this, Jürgen, it looks kinda spaghetti to me:
entry_SYSENTER_compat
...
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
then at that swapgs_restore_regs_and_return_to_usermode label:
#ifdef CONFIG_XEN_PV
ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
#endif
Can we simply jump directly to xenpv_restore_regs_and_return_to_usermode
from entry_SYSENTER_compat or is that CONFIG_DEBUG_ENTRY chunk there
needed?
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-04-29 9:56 ` Borislav Petkov
@ 2022-04-29 11:45 ` Lai Jiangshan
2022-04-29 12:22 ` Borislav Petkov
2022-05-02 12:18 ` Juergen Gross
1 sibling, 1 reply; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-29 11:45 UTC (permalink / raw)
To: Borislav Petkov
Cc: Jürgen Gross, LKML, Peter Zijlstra, Josh Poimboeuf,
Andy Lutomirski, Thomas Gleixner, X86 ML, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin, Kirill A. Shutemov
On Fri, Apr 29, 2022 at 5:56 PM Borislav Petkov <bp@alien8.de> wrote:
>
> On Thu, Apr 21, 2022 at 10:10:53PM +0800, Lai Jiangshan wrote:
> > diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> > index 4fdb007cddbd..c5aeb0819707 100644
> > --- a/arch/x86/entry/entry_64_compat.S
> > +++ b/arch/x86/entry/entry_64_compat.S
> > @@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
> > UNWIND_HINT_EMPTY
> > ENDBR
> > /* Interrupts are off on entry. */
> > - SWAPGS
> > + swapgs
> >
> > pushq %rax
> > SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
>
> I'm not sure about this: why can't XENPV do a 32-bit syscall through the vdso?
>
> Also, looking at this, Jürgen, it looks kinda spaghetti to me:
>
> entry_SYSENTER_compat
>
> ...
>
> /* XEN PV guests always use IRET path */
> ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
> "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
>
>
> then at that swapgs_restore_regs_and_return_to_usermode label:
>
> #ifdef CONFIG_XEN_PV
> ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
> #endif
>
>
> Can we simply jump directly to xenpv_restore_regs_and_return_to_usermode
> from entry_SYSENTER_compat or is that CONFIG_DEBUG_ENTRY chunk there
> needed?
Hello
Adding ALTERNATIVE in swapgs_restore_regs_and_return_to_usermode()
results a simpler patch which is better served as a bug fix:
https://lore.kernel.org/lkml/163861832551.11128.1645285137833652414.tip-bot2@tip-bot2/
which is also your suggestion:
https://lore.kernel.org/lkml/YYD9ohN2Zcy4EdMb@zn.tnic/
While the original patch has sprinkled all those ALTERNATIVE calls
everywhere:
https://lore.kernel.org/lkml/20211026141420.17138-5-jiangshanlai@gmail.com/
And this original version has a SYM_L_WEAK placeholder
which is objected to by some people.
Thanks
Lai
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-04-29 11:45 ` Lai Jiangshan
@ 2022-04-29 12:22 ` Borislav Petkov
0 siblings, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2022-04-29 12:22 UTC (permalink / raw)
To: Lai Jiangshan
Cc: Jürgen Gross, LKML, Peter Zijlstra, Josh Poimboeuf,
Andy Lutomirski, Thomas Gleixner, X86 ML, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin, Kirill A. Shutemov
On Fri, Apr 29, 2022 at 07:45:43PM +0800, Lai Jiangshan wrote:
> Adding ALTERNATIVE in swapgs_restore_regs_and_return_to_usermode()
All I'm saying is to do this:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c5aeb0819707..79f0511a4791 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -140,7 +140,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
call do_SYSENTER_32
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+ "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
jmp sysret32_from_system_call
.Lsysenter_fix_flags:
which eliminates one JMP in the XENPV case if the CONFIG_DEBUG_ENTRY
thing is not relevant to XENPV.
But let's see what Jürgen says first...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-04-29 9:56 ` Borislav Petkov
2022-04-29 11:45 ` Lai Jiangshan
@ 2022-05-02 12:18 ` Juergen Gross
2022-05-02 17:56 ` Borislav Petkov
1 sibling, 1 reply; 20+ messages in thread
From: Juergen Gross @ 2022-05-02 12:18 UTC (permalink / raw)
To: Borislav Petkov, Lai Jiangshan
Cc: linux-kernel, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, x86, Lai Jiangshan, Ingo Molnar, Dave Hansen,
H. Peter Anvin, Kirill A. Shutemov
[-- Attachment #1.1.1: Type: text/plain, Size: 1563 bytes --]
On 29.04.22 11:56, Borislav Petkov wrote:
> On Thu, Apr 21, 2022 at 10:10:53PM +0800, Lai Jiangshan wrote:
>> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
>> index 4fdb007cddbd..c5aeb0819707 100644
>> --- a/arch/x86/entry/entry_64_compat.S
>> +++ b/arch/x86/entry/entry_64_compat.S
>> @@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
>> UNWIND_HINT_EMPTY
>> ENDBR
>> /* Interrupts are off on entry. */
>> - SWAPGS
>> + swapgs
>>
>> pushq %rax
>> SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
>
> I'm not sure about this: why can't XENPV do a 32-bit syscall through the vdso?
The syscall will land in the hypervisor, which then will activate
the related registered callback (xen_sysenter_target).
> Also, looking at this, Jürgen, it looks kinda spaghetti to me:
>
> entry_SYSENTER_compat
>
> ...
>
> /* XEN PV guests always use IRET path */
> ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
> "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
>
>
> then at that swapgs_restore_regs_and_return_to_usermode label:
>
> #ifdef CONFIG_XEN_PV
> ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
> #endif
>
>
> Can we simply jump directly to xenpv_restore_regs_and_return_to_usermode
> from entry_SYSENTER_compat or is that CONFIG_DEBUG_ENTRY chunk there
> needed?
I wouldn't insist on the CONFIG_DEBUG_ENTRY chunk.
Juergen
[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-05-02 12:18 ` Juergen Gross
@ 2022-05-02 17:56 ` Borislav Petkov
2022-05-03 10:43 ` Borislav Petkov
0 siblings, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2022-05-02 17:56 UTC (permalink / raw)
To: Juergen Gross
Cc: Lai Jiangshan, linux-kernel, Peter Zijlstra, Josh Poimboeuf,
Andy Lutomirski, Thomas Gleixner, x86, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin, Kirill A. Shutemov
On Mon, May 02, 2022 at 02:18:35PM +0200, Juergen Gross wrote:
> The syscall will land in the hypervisor, which then will activate
> the related registered callback (xen_sysenter_target).
Aha, that would do a bit of a fixup and then go to the common label
entry_SYSENTER_compat_after_hwframe.
We probably should document this so that it is clear what uses those...
---
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9da5eef9affe..93065c4914ad 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -103,6 +103,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+
+ /*
+ * XENPV jumps here after frame fixup in the respective entry points in
+ * ...xen-asm.S.
+ */
SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
pushq %rax /* pt_regs->orig_ax */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c5aeb0819707..b473f8e17afc 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -71,6 +71,11 @@ SYM_CODE_START(entry_SYSENTER_compat)
pushfq /* pt_regs->flags (except IF = 0) */
pushq $__USER32_CS /* pt_regs->cs */
pushq $0 /* pt_regs->ip = 0 (placeholder) */
+
+ /*
+ * XENPV jumps here after frame fixup in the respective entry points in
+ * ...xen-asm.S.
+ */
SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
/*
> I wouldn't insist on the CONFIG_DEBUG_ENTRY chunk.
Ok, simpler asm, cool, let's do it then.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS
2022-05-02 17:56 ` Borislav Petkov
@ 2022-05-03 10:43 ` Borislav Petkov
0 siblings, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2022-05-03 10:43 UTC (permalink / raw)
To: Juergen Gross
Cc: Lai Jiangshan, linux-kernel, Peter Zijlstra, Josh Poimboeuf,
Andy Lutomirski, Thomas Gleixner, x86, Lai Jiangshan,
Ingo Molnar, Dave Hansen, H. Peter Anvin, Kirill A. Shutemov
On Mon, May 02, 2022 at 07:56:16PM +0200, Borislav Petkov wrote:
> Ok, simpler asm, cool, let's do it then.
Yeah, just tried it, it needs more ifdeffery:
ld: arch/x86/entry/entry_64.o:/home/boris/kernel/linux/arch/x86/entry/entry_64.S:129: undefined reference to `xenpv_restore_regs_and_return_to_usermode'
ld: arch/x86/entry/entry_64_compat.o:(.altinstr_replacement+0x1): undefined reference to `xenpv_restore_regs_and_return_to_usermode'
make: *** [Makefile:1158: vmlinux] Error 1
Oh well.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH V6 7/8] x86/entry: Remove the branch in sync_regs()
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
` (5 preceding siblings ...)
2022-04-21 14:10 ` [PATCH V6 6/8] x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
2022-04-21 14:10 ` [PATCH V6 8/8] x86/entry: Use idtentry macro for entry_INT80_compat() Lai Jiangshan
7 siblings, 0 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin, Kirill A. Shutemov, Fenghua Yu,
Chang S. Bae
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
In non-XENPV, the tss.sp0 is always the trampoline stack, and
sync_regs() is called on non-XENPV only since error_entry() is not
called on XENPV, so the stack must be the trampoline stack or one of
the IST stack if it is from user mode or bad iret.
Remove the check in sync_regs() and always copy the pt_regs from the
stack in the cpu entry area to the task stack.
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/kernel/traps.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 111b18d57a54..e1cb4c009d54 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -845,13 +845,13 @@ DEFINE_IDTENTRY_RAW(exc_int3)
/*
* Help handler running on a per-cpu (IST or entry trampoline) stack
* to switch to the normal thread stack if the interrupted code was in
- * user mode. The actual stack switch is done in entry_64.S
+ * user mode. The actual stack switch is done in entry_64.S.
*/
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
- if (regs != eregs)
- *regs = *eregs;
+
+ *regs = *eregs;
return regs;
}
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH V6 8/8] x86/entry: Use idtentry macro for entry_INT80_compat()
2022-04-21 14:10 [PATCH V6 0/8] x86/entry: Clean up entry code Lai Jiangshan
` (6 preceding siblings ...)
2022-04-21 14:10 ` [PATCH V6 7/8] x86/entry: Remove the branch in sync_regs() Lai Jiangshan
@ 2022-04-21 14:10 ` Lai Jiangshan
7 siblings, 0 replies; 20+ messages in thread
From: Lai Jiangshan @ 2022-04-21 14:10 UTC (permalink / raw)
To: linux-kernel
Cc: Borislav Petkov, Peter Zijlstra, Josh Poimboeuf, Andy Lutomirski,
Thomas Gleixner, Juergen Gross, x86, Lai Jiangshan, Ingo Molnar,
Dave Hansen, H. Peter Anvin, Joerg Roedel, Kirill A. Shutemov,
Chang S. Bae, Kees Cook
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
entry_INT80_compat() is identical to idtentry macro except a special
handling for %rax in the prolog.
Add the prolog to idtentry and use idtentry for entry_INT80_compat().
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
arch/x86/entry/entry_64.S | 18 ++++++
arch/x86/entry/entry_64_compat.S | 103 -------------------------------
arch/x86/include/asm/idtentry.h | 47 ++++++++++++++
arch/x86/include/asm/proto.h | 4 --
4 files changed, 65 insertions(+), 107 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 312186612f4e..2bbecde5b228 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -386,6 +386,24 @@ SYM_CODE_START(\asmsym)
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
+ .if \vector == IA32_SYSCALL_VECTOR
+ /*
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the saved RAX contains a 32-bit number when we're
+ * invoking a 32-bit syscall. Just in case the high bits are
+ * nonzero, zero-extend the syscall number. (This could almost
+ * certainly be deleted with no ill effects.)
+ */
+ movl %eax, %eax
+
+ /*
+ * do_int80_syscall_32() expects regs->orig_ax to be user ax,
+ * and regs->ax to be $-ENOSYS.
+ */
+ movq %rax, (%rsp)
+ movq $-ENOSYS, %rax
+ .endif
+
.if \vector == X86_TRAP_BP
/*
* If coming from kernel space, create a 6-word gap to allow the
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c5aeb0819707..6866151bbef3 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -315,106 +315,3 @@ sysret32_from_system_call:
swapgs
sysretl
SYM_CODE_END(entry_SYSCALL_compat)
-
-/*
- * 32-bit legacy system call entry.
- *
- * 32-bit x86 Linux system calls traditionally used the INT $0x80
- * instruction. INT $0x80 lands here.
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls. Instances of INT $0x80 can be found inline in
- * various programs and libraries. It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method. Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path. It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * Arguments:
- * eax system call number
- * ebx arg1
- * ecx arg2
- * edx arg3
- * esi arg4
- * edi arg5
- * ebp arg6
- */
-SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_EMPTY
- ENDBR
- /*
- * Interrupts are off on entry.
- */
- ASM_CLAC /* Do this early to minimize exposure */
- SWAPGS
-
- /*
- * User tracing code (ptrace or signal handlers) might assume that
- * the saved RAX contains a 32-bit number when we're invoking a 32-bit
- * syscall. Just in case the high bits are nonzero, zero-extend
- * the syscall number. (This could almost certainly be deleted
- * with no ill effects.)
- */
- movl %eax, %eax
-
- /* switch to thread stack expects orig_ax and rdi to be pushed */
- pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
-
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-
- /* In the Xen PV case we already run on the thread stack. */
- ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
-
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- pushq 6*8(%rdi) /* regs->ss */
- pushq 5*8(%rdi) /* regs->rsp */
- pushq 4*8(%rdi) /* regs->eflags */
- pushq 3*8(%rdi) /* regs->cs */
- pushq 2*8(%rdi) /* regs->ip */
- pushq 1*8(%rdi) /* regs->orig_ax */
- pushq (%rdi) /* pt_regs->di */
-.Lint80_keep_stack:
-
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq %r10 /* pt_regs->r10*/
- xorl %r10d, %r10d /* nospec r10 */
- pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp */
- pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15 */
-
- UNWIND_HINT_REGS
-
- cld
-
- movq %rsp, %rdi
- call do_int80_syscall_32
- jmp swapgs_restore_regs_and_return_to_usermode
-SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..5bf8a01d31f3 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -206,6 +206,20 @@ __visible noinstr void func(struct pt_regs *regs, \
\
static noinline void __##func(struct pt_regs *regs, u32 vector)
+/**
+ * DECLARE_IDTENTRY_IA32_EMULATION - Declare functions for int80
+ * @vector: Vector number (ignored for C)
+ * @asm_func: Function name of the entry point
+ * @cfunc: The C handler called from the ASM entry point (ignored for C)
+ *
+ * Declares two functions:
+ * - The ASM entry point: asm_func
+ * - The XEN PV trap entry point: xen_##asm_func (maybe unused)
+ */
+#define DECLARE_IDTENTRY_IA32_EMULATION(vector, asm_func, cfunc) \
+ asmlinkage void asm_func(void); \
+ asmlinkage void xen_##asm_func(void)
+
/**
* DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points
* @vector: Vector number (ignored for C)
@@ -432,6 +446,35 @@ __visible noinstr void func(struct pt_regs *regs, \
#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \
idtentry vector asm_##func func has_error_code=1
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp arg6
+ */
+#define DECLARE_IDTENTRY_IA32_EMULATION(vector, asm_func, cfunc) \
+ idtentry vector asm_func cfunc has_error_code=0
+
/* Special case for 32bit IRET 'trap'. Do not emit ASM code */
#define DECLARE_IDTENTRY_SW(vector, func)
@@ -642,6 +685,10 @@ DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt);
DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt);
#endif
+#ifdef CONFIG_IA32_EMULATION
+DECLARE_IDTENTRY_IA32_EMULATION(IA32_SYSCALL_VECTOR, entry_INT80_compat, do_int80_syscall_32);
+#endif
+
/* System vector entry points */
#ifdef CONFIG_X86_LOCAL_APIC
DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 0f899c8d7a4e..4700d1650410 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -28,10 +28,6 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
-void entry_INT80_compat(void);
-#ifdef CONFIG_XEN_PV
-void xen_entry_INT80_compat(void);
-#endif
#endif
void x86_configure_nx(void);
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 20+ messages in thread