All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
@ 2017-08-08  3:59 Andy Lutomirski
  2017-08-09 15:39 ` Juergen Gross
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Andy Lutomirski @ 2017-08-08  3:59 UTC (permalink / raw)
  To: X86 ML, Juergen Gross
  Cc: Borislav Petkov, linux-kernel, xen-devel, Boris Ostrovsky,
	H. Peter Anvin, Andy Lutomirski

Xen's raw SYSCALL entries are much less weird than native.  Rather
than fudging them to look like native entries, use the Xen-provided
stack frame directly.

This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
benefit from similar treatment.

This makes one change to the native code path: the compat
instruction that clears the high 32 bits of %rax is moved slightly
later.  I'd be surprised if this affects performance at all.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Changes from v1 (which I never actually emailed):
 - Fix zero-extension in the compat case.

 arch/x86/entry/entry_64.S        |  9 ++-------
 arch/x86/entry/entry_64_compat.S |  7 +++----
 arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
 3 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index aa58155187c5..7cee92cf807f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
 	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
 	 * it is too small to ever cause noticeable irq latency.
 	 */
-	SWAPGS_UNSAFE_STACK
-	/*
-	 * A hypervisor implementation might want to use a label
-	 * after the swapgs, so that it can do the swapgs
-	 * for the guest and jump here on syscall.
-	 */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
 
+	swapgs
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
 	pushq	%rax				/* pt_regs->orig_ax */
 	pushq	%rdi				/* pt_regs->di */
 	pushq	%rsi				/* pt_regs->si */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..5314d7b8e5ad 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
  */
 ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
-	SWAPGS_UNSAFE_STACK
+	swapgs
 
 	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	/* Zero-extending 32-bit regs, do not remove */
-	movl	%eax, %eax
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
 	pushq	%r8			/* pt_regs->sp */
 	pushq	%r11			/* pt_regs->flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	%rcx			/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
+	movl	%eax, %eax		/* discard orig_ax high bits */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..a8a4f4c460a6 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
  *	rip
  *	r11
  * rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
  */
 
-.macro undo_xen_syscall
-	mov 0*8(%rsp), %rcx
-	mov 1*8(%rsp), %r11
-	mov 5*8(%rsp), %rsp
-.endm
-
 /* Normal 64-bit system call target */
 ENTRY(xen_syscall_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_64_after_swapgs
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_64_after_hwframe
 ENDPROC(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
 ENTRY(xen_syscall32_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_compat
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
 ENTRY(xen_sysenter_target)
-	undo_xen_syscall
+	mov 0*8(%rsp), %rcx
+	mov 1*8(%rsp), %r11
+	mov 5*8(%rsp), %rsp
 	jmp entry_SYSENTER_compat
 ENDPROC(xen_sysenter_target)
 
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-08  3:59 [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
@ 2017-08-09 15:39 ` Juergen Gross
  2017-08-09 15:39 ` Juergen Gross
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Juergen Gross @ 2017-08-09 15:39 UTC (permalink / raw)
  To: Andy Lutomirski, X86 ML
  Cc: Borislav Petkov, linux-kernel, xen-devel, Boris Ostrovsky,
	H. Peter Anvin

On 08/08/17 05:59, Andy Lutomirski wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
> 
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
> 
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Reviewed-by: Juergen Gross <jgross@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>


Thanks,

Juergen

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-08  3:59 [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
  2017-08-09 15:39 ` Juergen Gross
@ 2017-08-09 15:39 ` Juergen Gross
  2017-08-10 12:22 ` [tip:x86/asm] " tip-bot for Andy Lutomirski
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Juergen Gross @ 2017-08-09 15:39 UTC (permalink / raw)
  To: Andy Lutomirski, X86 ML
  Cc: xen-devel, Boris Ostrovsky, Borislav Petkov, linux-kernel,
	H. Peter Anvin

On 08/08/17 05:59, Andy Lutomirski wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
> 
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
> 
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Reviewed-by: Juergen Gross <jgross@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>


Thanks,

Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [tip:x86/asm] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-08  3:59 [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
  2017-08-09 15:39 ` Juergen Gross
  2017-08-09 15:39 ` Juergen Gross
@ 2017-08-10 12:22 ` tip-bot for Andy Lutomirski
  2017-08-14  2:44 ` [PATCH v2] " Brian Gerst
  2017-08-14  2:44 ` Brian Gerst
  4 siblings, 0 replies; 15+ messages in thread
From: tip-bot for Andy Lutomirski @ 2017-08-10 12:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bpetkov, tglx, luto, boris.ostrovsky, jgross, mingo, torvalds,
	hpa, peterz, linux-kernel

Commit-ID:  8a9949bc71a71b3dd633255ebe8f8869b1f73474
Gitweb:     http://git.kernel.org/tip/8a9949bc71a71b3dd633255ebe8f8869b1f73474
Author:     Andy Lutomirski <luto@kernel.org>
AuthorDate: Mon, 7 Aug 2017 20:59:21 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 10 Aug 2017 13:14:32 +0200

x86/xen/64: Rearrange the SYSCALL entries

Xen's raw SYSCALL entries are much less weird than native.  Rather
than fudging them to look like native entries, use the Xen-provided
stack frame directly.

This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
benefit from similar treatment.

This makes one change to the native code path: the compat
instruction that clears the high 32 bits of %rax is moved slightly
later.  I'd be surprised if this affects performance at all.

Tested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_64.S        |  9 ++-------
 arch/x86/entry/entry_64_compat.S |  7 +++----
 arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
 3 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 64b233a..4dbb336 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
 	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
 	 * it is too small to ever cause noticeable irq latency.
 	 */
-	SWAPGS_UNSAFE_STACK
-	/*
-	 * A hypervisor implementation might want to use a label
-	 * after the swapgs, so that it can do the swapgs
-	 * for the guest and jump here on syscall.
-	 */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
 
+	swapgs
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
 	pushq	%rax				/* pt_regs->orig_ax */
 	pushq	%rdi				/* pt_regs->di */
 	pushq	%rsi				/* pt_regs->si */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721da..5314d7b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
  */
 ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
-	SWAPGS_UNSAFE_STACK
+	swapgs
 
 	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	/* Zero-extending 32-bit regs, do not remove */
-	movl	%eax, %eax
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
 	pushq	%r8			/* pt_regs->sp */
 	pushq	%r11			/* pt_regs->flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	%rcx			/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
+	movl	%eax, %eax		/* discard orig_ax high bits */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df431..a8a4f4c 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
  *	rip
  *	r11
  * rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
  */
 
-.macro undo_xen_syscall
-	mov 0*8(%rsp), %rcx
-	mov 1*8(%rsp), %r11
-	mov 5*8(%rsp), %rsp
-.endm
-
 /* Normal 64-bit system call target */
 ENTRY(xen_syscall_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_64_after_swapgs
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_64_after_hwframe
 ENDPROC(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
 ENTRY(xen_syscall32_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_compat
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
 ENTRY(xen_sysenter_target)
-	undo_xen_syscall
+	mov 0*8(%rsp), %rcx
+	mov 1*8(%rsp), %r11
+	mov 5*8(%rsp), %rsp
 	jmp entry_SYSENTER_compat
 ENDPROC(xen_sysenter_target)
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-08  3:59 [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
                   ` (3 preceding siblings ...)
  2017-08-14  2:44 ` [PATCH v2] " Brian Gerst
@ 2017-08-14  2:44 ` Brian Gerst
  2017-08-14  5:53   ` Andy Lutomirski
  2017-08-14  5:53   ` Andy Lutomirski
  4 siblings, 2 replies; 15+ messages in thread
From: Brian Gerst @ 2017-08-14  2:44 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, Juergen Gross, Borislav Petkov,
	Linux Kernel Mailing List, xen-devel, Boris Ostrovsky,
	H. Peter Anvin

On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
>
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
>
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>
> Changes from v1 (which I never actually emailed):
>  - Fix zero-extension in the compat case.
>
>  arch/x86/entry/entry_64.S        |  9 ++-------
>  arch/x86/entry/entry_64_compat.S |  7 +++----
>  arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
>  3 files changed, 14 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index aa58155187c5..7cee92cf807f 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
>          * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
>          * it is too small to ever cause noticeable irq latency.
>          */
> -       SWAPGS_UNSAFE_STACK
> -       /*
> -        * A hypervisor implementation might want to use a label
> -        * after the swapgs, so that it can do the swapgs
> -        * for the guest and jump here on syscall.
> -        */
> -GLOBAL(entry_SYSCALL_64_after_swapgs)
>
> +       swapgs
>         movq    %rsp, PER_CPU_VAR(rsp_scratch)
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
>         pushq   %r11                            /* pt_regs->flags */
>         pushq   $__USER_CS                      /* pt_regs->cs */
>         pushq   %rcx                            /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_64_after_hwframe)
>         pushq   %rax                            /* pt_regs->orig_ax */
>         pushq   %rdi                            /* pt_regs->di */
>         pushq   %rsi                            /* pt_regs->si */
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index e1721dafbcb1..5314d7b8e5ad 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
>   */
>  ENTRY(entry_SYSCALL_compat)
>         /* Interrupts are off on entry. */
> -       SWAPGS_UNSAFE_STACK
> +       swapgs
>
>         /* Stash user ESP and switch to the kernel stack. */
>         movl    %esp, %r8d
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> -       /* Zero-extending 32-bit regs, do not remove */
> -       movl    %eax, %eax
> -
>         /* Construct struct pt_regs on stack */
>         pushq   $__USER32_DS            /* pt_regs->ss */
>         pushq   %r8                     /* pt_regs->sp */
>         pushq   %r11                    /* pt_regs->flags */
>         pushq   $__USER32_CS            /* pt_regs->cs */
>         pushq   %rcx                    /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_compat_after_hwframe)
> +       movl    %eax, %eax              /* discard orig_ax high bits */
>         pushq   %rax                    /* pt_regs->orig_ax */
>         pushq   %rdi                    /* pt_regs->di */
>         pushq   %rsi                    /* pt_regs->si */
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index c3df43141e70..a8a4f4c460a6 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
>   *     rip
>   *     r11
>   * rsp->rcx
> - *
> - * In all the entrypoints, we undo all that to make it look like a
> - * CPU-generated syscall/sysenter and jump to the normal entrypoint.
>   */
>
> -.macro undo_xen_syscall
> -       mov 0*8(%rsp), %rcx
> -       mov 1*8(%rsp), %r11
> -       mov 5*8(%rsp), %rsp
> -.endm
> -
>  /* Normal 64-bit system call target */
>  ENTRY(xen_syscall_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_64_after_swapgs
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_64_after_hwframe
>  ENDPROC(xen_syscall_target)
>
>  #ifdef CONFIG_IA32_EMULATION
>
>  /* 32-bit compat syscall target */
>  ENTRY(xen_syscall32_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_compat
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
>  /* 32-bit compat sysenter target */
>  ENTRY(xen_sysenter_target)
> -       undo_xen_syscall
> +       mov 0*8(%rsp), %rcx
> +       mov 1*8(%rsp), %r11
> +       mov 5*8(%rsp), %rsp
>         jmp entry_SYSENTER_compat
>  ENDPROC(xen_sysenter_target)

This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
"parent: write to 0x80 (should fail)", and the fault isn't caught by
the signal handler.  It just dumps back to the shell.  The tests pass
after reverting this.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-08  3:59 [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
                   ` (2 preceding siblings ...)
  2017-08-10 12:22 ` [tip:x86/asm] " tip-bot for Andy Lutomirski
@ 2017-08-14  2:44 ` Brian Gerst
  2017-08-14  2:44 ` Brian Gerst
  4 siblings, 0 replies; 15+ messages in thread
From: Brian Gerst @ 2017-08-14  2:44 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Juergen Gross, X86 ML, Linux Kernel Mailing List, H. Peter Anvin,
	xen-devel, Boris Ostrovsky, Borislav Petkov

On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
>
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
>
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>
> Changes from v1 (which I never actually emailed):
>  - Fix zero-extension in the compat case.
>
>  arch/x86/entry/entry_64.S        |  9 ++-------
>  arch/x86/entry/entry_64_compat.S |  7 +++----
>  arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
>  3 files changed, 14 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index aa58155187c5..7cee92cf807f 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
>          * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
>          * it is too small to ever cause noticeable irq latency.
>          */
> -       SWAPGS_UNSAFE_STACK
> -       /*
> -        * A hypervisor implementation might want to use a label
> -        * after the swapgs, so that it can do the swapgs
> -        * for the guest and jump here on syscall.
> -        */
> -GLOBAL(entry_SYSCALL_64_after_swapgs)
>
> +       swapgs
>         movq    %rsp, PER_CPU_VAR(rsp_scratch)
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
>         pushq   %r11                            /* pt_regs->flags */
>         pushq   $__USER_CS                      /* pt_regs->cs */
>         pushq   %rcx                            /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_64_after_hwframe)
>         pushq   %rax                            /* pt_regs->orig_ax */
>         pushq   %rdi                            /* pt_regs->di */
>         pushq   %rsi                            /* pt_regs->si */
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index e1721dafbcb1..5314d7b8e5ad 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
>   */
>  ENTRY(entry_SYSCALL_compat)
>         /* Interrupts are off on entry. */
> -       SWAPGS_UNSAFE_STACK
> +       swapgs
>
>         /* Stash user ESP and switch to the kernel stack. */
>         movl    %esp, %r8d
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> -       /* Zero-extending 32-bit regs, do not remove */
> -       movl    %eax, %eax
> -
>         /* Construct struct pt_regs on stack */
>         pushq   $__USER32_DS            /* pt_regs->ss */
>         pushq   %r8                     /* pt_regs->sp */
>         pushq   %r11                    /* pt_regs->flags */
>         pushq   $__USER32_CS            /* pt_regs->cs */
>         pushq   %rcx                    /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_compat_after_hwframe)
> +       movl    %eax, %eax              /* discard orig_ax high bits */
>         pushq   %rax                    /* pt_regs->orig_ax */
>         pushq   %rdi                    /* pt_regs->di */
>         pushq   %rsi                    /* pt_regs->si */
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index c3df43141e70..a8a4f4c460a6 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
>   *     rip
>   *     r11
>   * rsp->rcx
> - *
> - * In all the entrypoints, we undo all that to make it look like a
> - * CPU-generated syscall/sysenter and jump to the normal entrypoint.
>   */
>
> -.macro undo_xen_syscall
> -       mov 0*8(%rsp), %rcx
> -       mov 1*8(%rsp), %r11
> -       mov 5*8(%rsp), %rsp
> -.endm
> -
>  /* Normal 64-bit system call target */
>  ENTRY(xen_syscall_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_64_after_swapgs
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_64_after_hwframe
>  ENDPROC(xen_syscall_target)
>
>  #ifdef CONFIG_IA32_EMULATION
>
>  /* 32-bit compat syscall target */
>  ENTRY(xen_syscall32_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_compat
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
>  /* 32-bit compat sysenter target */
>  ENTRY(xen_sysenter_target)
> -       undo_xen_syscall
> +       mov 0*8(%rsp), %rcx
> +       mov 1*8(%rsp), %r11
> +       mov 5*8(%rsp), %rsp
>         jmp entry_SYSENTER_compat
>  ENDPROC(xen_sysenter_target)

This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
"parent: write to 0x80 (should fail)", and the fault isn't caught by
the signal handler.  It just dumps back to the shell.  The tests pass
after reverting this.

--
Brian Gerst

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  2:44 ` Brian Gerst
@ 2017-08-14  5:53   ` Andy Lutomirski
  2017-08-14  6:42     ` Andy Lutomirski
                       ` (5 more replies)
  2017-08-14  5:53   ` Andy Lutomirski
  1 sibling, 6 replies; 15+ messages in thread
From: Andy Lutomirski @ 2017-08-14  5:53 UTC (permalink / raw)
  To: Brian Gerst, Andrew Cooper
  Cc: Andy Lutomirski, X86 ML, Juergen Gross, Borislav Petkov,
	Linux Kernel Mailing List, xen-devel, Boris Ostrovsky,
	H. Peter Anvin

On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>  /* Normal 64-bit system call target */
>>  ENTRY(xen_syscall_target)
>> -       undo_xen_syscall
>> -       jmp entry_SYSCALL_64_after_swapgs
>> +       popq %rcx
>> +       popq %r11
>> +       jmp entry_SYSCALL_64_after_hwframe
>>  ENDPROC(xen_syscall_target)
>>
>>  #ifdef CONFIG_IA32_EMULATION
>>
>>  /* 32-bit compat syscall target */
>>  ENTRY(xen_syscall32_target)
>> -       undo_xen_syscall
>> -       jmp entry_SYSCALL_compat
>> +       popq %rcx
>> +       popq %r11
>> +       jmp entry_SYSCALL_compat_after_hwframe
>>  ENDPROC(xen_syscall32_target)
>>
>>  /* 32-bit compat sysenter target */
>>  ENTRY(xen_sysenter_target)
>> -       undo_xen_syscall
>> +       mov 0*8(%rsp), %rcx
>> +       mov 1*8(%rsp), %r11
>> +       mov 5*8(%rsp), %rsp
>>         jmp entry_SYSENTER_compat
>>  ENDPROC(xen_sysenter_target)
>
> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
> "parent: write to 0x80 (should fail)", and the fault isn't caught by
> the signal handler.  It just dumps back to the shell.  The tests pass
> after reverting this.

I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:

diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index a8a4f4c460a6..6255e00f425e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
 ENTRY(xen_syscall32_target)
        popq %rcx
        popq %r11
+       movq $__USER32_DS, 4*8(%rsp)
+       movq $__USER32_CS, 1*8(%rsp)
+       movq %r11, 2*8(%rsp)
        jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)

but I haven't tried to diagnose precisely what's going on.

Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
to be a problem, but it kills opportunistic sysretl.  Maybe that's
triggering a preexisting bug?

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  2:44 ` Brian Gerst
  2017-08-14  5:53   ` Andy Lutomirski
@ 2017-08-14  5:53   ` Andy Lutomirski
  1 sibling, 0 replies; 15+ messages in thread
From: Andy Lutomirski @ 2017-08-14  5:53 UTC (permalink / raw)
  To: Brian Gerst, Andrew Cooper
  Cc: Juergen Gross, X86 ML, Linux Kernel Mailing List,
	Andy Lutomirski, H. Peter Anvin, xen-devel, Boris Ostrovsky,
	Borislav Petkov

On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>  /* Normal 64-bit system call target */
>>  ENTRY(xen_syscall_target)
>> -       undo_xen_syscall
>> -       jmp entry_SYSCALL_64_after_swapgs
>> +       popq %rcx
>> +       popq %r11
>> +       jmp entry_SYSCALL_64_after_hwframe
>>  ENDPROC(xen_syscall_target)
>>
>>  #ifdef CONFIG_IA32_EMULATION
>>
>>  /* 32-bit compat syscall target */
>>  ENTRY(xen_syscall32_target)
>> -       undo_xen_syscall
>> -       jmp entry_SYSCALL_compat
>> +       popq %rcx
>> +       popq %r11
>> +       jmp entry_SYSCALL_compat_after_hwframe
>>  ENDPROC(xen_syscall32_target)
>>
>>  /* 32-bit compat sysenter target */
>>  ENTRY(xen_sysenter_target)
>> -       undo_xen_syscall
>> +       mov 0*8(%rsp), %rcx
>> +       mov 1*8(%rsp), %r11
>> +       mov 5*8(%rsp), %rsp
>>         jmp entry_SYSENTER_compat
>>  ENDPROC(xen_sysenter_target)
>
> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
> "parent: write to 0x80 (should fail)", and the fault isn't caught by
> the signal handler.  It just dumps back to the shell.  The tests pass
> after reverting this.

I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:

diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index a8a4f4c460a6..6255e00f425e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
 ENTRY(xen_syscall32_target)
        popq %rcx
        popq %r11
+       movq $__USER32_DS, 4*8(%rsp)
+       movq $__USER32_CS, 1*8(%rsp)
+       movq %r11, 2*8(%rsp)
        jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)

but I haven't tried to diagnose precisely what's going on.

Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
to be a problem, but it kills opportunistic sysretl.  Maybe that's
triggering a preexisting bug?

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  5:53   ` Andy Lutomirski
@ 2017-08-14  6:42     ` Andy Lutomirski
  2017-08-14  6:42     ` Andy Lutomirski
                       ` (4 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Andy Lutomirski @ 2017-08-14  6:42 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Brian Gerst, Andrew Cooper, X86 ML, Juergen Gross,
	Borislav Petkov, Linux Kernel Mailing List, xen-devel,
	Boris Ostrovsky, H. Peter Anvin

On Sun, Aug 13, 2017 at 10:53 PM, Andy Lutomirski <luto@kernel.org> wrote:
> On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>>  /* Normal 64-bit system call target */
>>>  ENTRY(xen_syscall_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_64_after_swapgs
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_64_after_hwframe
>>>  ENDPROC(xen_syscall_target)
>>>
>>>  #ifdef CONFIG_IA32_EMULATION
>>>
>>>  /* 32-bit compat syscall target */
>>>  ENTRY(xen_syscall32_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_compat
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_compat_after_hwframe
>>>  ENDPROC(xen_syscall32_target)
>>>
>>>  /* 32-bit compat sysenter target */
>>>  ENTRY(xen_sysenter_target)
>>> -       undo_xen_syscall
>>> +       mov 0*8(%rsp), %rcx
>>> +       mov 1*8(%rsp), %r11
>>> +       mov 5*8(%rsp), %rsp
>>>         jmp entry_SYSENTER_compat
>>>  ENDPROC(xen_sysenter_target)
>>
>> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
>> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
>> "parent: write to 0x80 (should fail)", and the fault isn't caught by
>> the signal handler.  It just dumps back to the shell.  The tests pass
>> after reverting this.
>
> I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:
>
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index a8a4f4c460a6..6255e00f425e 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
>  ENTRY(xen_syscall32_target)
>         popq %rcx
>         popq %r11
> +       movq $__USER32_DS, 4*8(%rsp)
> +       movq $__USER32_CS, 1*8(%rsp)
> +       movq %r11, 2*8(%rsp)
>         jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
> but I haven't tried to diagnose precisely what's going on.
>
> Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
> to be a problem, but it kills opportunistic sysretl.  Maybe that's
> triggering a preexisting bug?

It is indeed triggering an existing but, but that bug is not a kernel
bug :)  It's this thing:

https://sourceware.org/bugzilla/show_bug.cgi?id=21269

See, we have this old legacy garbage in which, when running with
nonstandard SS, a certain special, otherwise nonsensical input to
sigaction() causes a stack switch.  Xen PV runs user code with a
nonstandard SS, and glibc accidentally passes this weird parameter to
sigaction() on a regular basis.  With this patch applied, the kernel
suddenly starts to *realize* that ss is weird, and boom.  (Or maybe it
increases the chance that SS is actually weird, since I'd expect this
to trip on #GP, not SYSCALL.  But I don't care quite enough to dig
further.)

Patch coming.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  5:53   ` Andy Lutomirski
  2017-08-14  6:42     ` Andy Lutomirski
@ 2017-08-14  6:42     ` Andy Lutomirski
  2017-08-14  6:45     ` Andrew Cooper
                       ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Andy Lutomirski @ 2017-08-14  6:42 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Juergen Gross, H. Peter Anvin, Brian Gerst, X86 ML,
	Linux Kernel Mailing List, Andrew Cooper, xen-devel,
	Boris Ostrovsky, Borislav Petkov

On Sun, Aug 13, 2017 at 10:53 PM, Andy Lutomirski <luto@kernel.org> wrote:
> On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>>  /* Normal 64-bit system call target */
>>>  ENTRY(xen_syscall_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_64_after_swapgs
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_64_after_hwframe
>>>  ENDPROC(xen_syscall_target)
>>>
>>>  #ifdef CONFIG_IA32_EMULATION
>>>
>>>  /* 32-bit compat syscall target */
>>>  ENTRY(xen_syscall32_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_compat
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_compat_after_hwframe
>>>  ENDPROC(xen_syscall32_target)
>>>
>>>  /* 32-bit compat sysenter target */
>>>  ENTRY(xen_sysenter_target)
>>> -       undo_xen_syscall
>>> +       mov 0*8(%rsp), %rcx
>>> +       mov 1*8(%rsp), %r11
>>> +       mov 5*8(%rsp), %rsp
>>>         jmp entry_SYSENTER_compat
>>>  ENDPROC(xen_sysenter_target)
>>
>> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
>> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
>> "parent: write to 0x80 (should fail)", and the fault isn't caught by
>> the signal handler.  It just dumps back to the shell.  The tests pass
>> after reverting this.
>
> I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:
>
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index a8a4f4c460a6..6255e00f425e 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
>  ENTRY(xen_syscall32_target)
>         popq %rcx
>         popq %r11
> +       movq $__USER32_DS, 4*8(%rsp)
> +       movq $__USER32_CS, 1*8(%rsp)
> +       movq %r11, 2*8(%rsp)
>         jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
> but I haven't tried to diagnose precisely what's going on.
>
> Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
> to be a problem, but it kills opportunistic sysretl.  Maybe that's
> triggering a preexisting bug?

It is indeed triggering an existing but, but that bug is not a kernel
bug :)  It's this thing:

https://sourceware.org/bugzilla/show_bug.cgi?id=21269

See, we have this old legacy garbage in which, when running with
nonstandard SS, a certain special, otherwise nonsensical input to
sigaction() causes a stack switch.  Xen PV runs user code with a
nonstandard SS, and glibc accidentally passes this weird parameter to
sigaction() on a regular basis.  With this patch applied, the kernel
suddenly starts to *realize* that ss is weird, and boom.  (Or maybe it
increases the chance that SS is actually weird, since I'd expect this
to trip on #GP, not SYSCALL.  But I don't care quite enough to dig
further.)

Patch coming.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  5:53   ` Andy Lutomirski
                       ` (2 preceding siblings ...)
  2017-08-14  6:45     ` Andrew Cooper
@ 2017-08-14  6:45     ` Andrew Cooper
  2017-08-14 12:46     ` Brian Gerst
  2017-08-14 12:46     ` Brian Gerst
  5 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2017-08-14  6:45 UTC (permalink / raw)
  To: Andy Lutomirski, Brian Gerst
  Cc: X86 ML, Juergen Gross, Borislav Petkov,
	Linux Kernel Mailing List, xen-devel, Boris Ostrovsky,
	H. Peter Anvin

On 14/08/2017 06:53, Andy Lutomirski wrote:
> On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>>  /* Normal 64-bit system call target */
>>>  ENTRY(xen_syscall_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_64_after_swapgs
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_64_after_hwframe
>>>  ENDPROC(xen_syscall_target)
>>>
>>>  #ifdef CONFIG_IA32_EMULATION
>>>
>>>  /* 32-bit compat syscall target */
>>>  ENTRY(xen_syscall32_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_compat
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_compat_after_hwframe
>>>  ENDPROC(xen_syscall32_target)
>>>
>>>  /* 32-bit compat sysenter target */
>>>  ENTRY(xen_sysenter_target)
>>> -       undo_xen_syscall
>>> +       mov 0*8(%rsp), %rcx
>>> +       mov 1*8(%rsp), %r11
>>> +       mov 5*8(%rsp), %rsp
>>>         jmp entry_SYSENTER_compat
>>>  ENDPROC(xen_sysenter_target)
>> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
>> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
>> "parent: write to 0x80 (should fail)", and the fault isn't caught by
>> the signal handler.  It just dumps back to the shell.  The tests pass
>> after reverting this.
> I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:
>
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index a8a4f4c460a6..6255e00f425e 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
>  ENTRY(xen_syscall32_target)
>         popq %rcx
>         popq %r11
> +       movq $__USER32_DS, 4*8(%rsp)
> +       movq $__USER32_CS, 1*8(%rsp)
> +       movq %r11, 2*8(%rsp)
>         jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
> but I haven't tried to diagnose precisely what's going on.
>
> Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
> to be a problem, but it kills opportunistic sysretl.  Maybe that's
> triggering a preexisting bug?

The reason %rcx/%r11 are extra on the stack is because Xen uses sysret
to get here.  This is part of the 64bit PV ABI.

%cs will be 0xe033 (FLAT_CS64) and %ss will be 0xe02b (FLAT_SS64).

I would expect it to kill opportunistic sysret, as Xen's and Linux's
idea of using sysret differ.

~Andrew

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  5:53   ` Andy Lutomirski
  2017-08-14  6:42     ` Andy Lutomirski
  2017-08-14  6:42     ` Andy Lutomirski
@ 2017-08-14  6:45     ` Andrew Cooper
  2017-08-14  6:45     ` Andrew Cooper
                       ` (2 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2017-08-14  6:45 UTC (permalink / raw)
  To: Andy Lutomirski, Brian Gerst
  Cc: Juergen Gross, X86 ML, Linux Kernel Mailing List, H. Peter Anvin,
	xen-devel, Boris Ostrovsky, Borislav Petkov

On 14/08/2017 06:53, Andy Lutomirski wrote:
> On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>>  /* Normal 64-bit system call target */
>>>  ENTRY(xen_syscall_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_64_after_swapgs
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_64_after_hwframe
>>>  ENDPROC(xen_syscall_target)
>>>
>>>  #ifdef CONFIG_IA32_EMULATION
>>>
>>>  /* 32-bit compat syscall target */
>>>  ENTRY(xen_syscall32_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_compat
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_compat_after_hwframe
>>>  ENDPROC(xen_syscall32_target)
>>>
>>>  /* 32-bit compat sysenter target */
>>>  ENTRY(xen_sysenter_target)
>>> -       undo_xen_syscall
>>> +       mov 0*8(%rsp), %rcx
>>> +       mov 1*8(%rsp), %r11
>>> +       mov 5*8(%rsp), %rsp
>>>         jmp entry_SYSENTER_compat
>>>  ENDPROC(xen_sysenter_target)
>> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
>> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
>> "parent: write to 0x80 (should fail)", and the fault isn't caught by
>> the signal handler.  It just dumps back to the shell.  The tests pass
>> after reverting this.
> I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:
>
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index a8a4f4c460a6..6255e00f425e 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
>  ENTRY(xen_syscall32_target)
>         popq %rcx
>         popq %r11
> +       movq $__USER32_DS, 4*8(%rsp)
> +       movq $__USER32_CS, 1*8(%rsp)
> +       movq %r11, 2*8(%rsp)
>         jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
> but I haven't tried to diagnose precisely what's going on.
>
> Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
> to be a problem, but it kills opportunistic sysretl.  Maybe that's
> triggering a preexisting bug?

The reason %rcx/%r11 are extra on the stack is because Xen uses sysret
to get here.  This is part of the 64bit PV ABI.

%cs will be 0xe033 (FLAT_CS64) and %ss will be 0xe02b (FLAT_SS64).

I would expect it to kill opportunistic sysret, as Xen's and Linux's
idea of using sysret differ.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  5:53   ` Andy Lutomirski
                       ` (3 preceding siblings ...)
  2017-08-14  6:45     ` Andrew Cooper
@ 2017-08-14 12:46     ` Brian Gerst
  2017-08-14 12:46     ` Brian Gerst
  5 siblings, 0 replies; 15+ messages in thread
From: Brian Gerst @ 2017-08-14 12:46 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andrew Cooper, X86 ML, Juergen Gross, Borislav Petkov,
	Linux Kernel Mailing List, xen-devel, Boris Ostrovsky,
	H. Peter Anvin

On Mon, Aug 14, 2017 at 1:53 AM, Andy Lutomirski <luto@kernel.org> wrote:
> On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>>  /* Normal 64-bit system call target */
>>>  ENTRY(xen_syscall_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_64_after_swapgs
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_64_after_hwframe
>>>  ENDPROC(xen_syscall_target)
>>>
>>>  #ifdef CONFIG_IA32_EMULATION
>>>
>>>  /* 32-bit compat syscall target */
>>>  ENTRY(xen_syscall32_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_compat
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_compat_after_hwframe
>>>  ENDPROC(xen_syscall32_target)
>>>
>>>  /* 32-bit compat sysenter target */
>>>  ENTRY(xen_sysenter_target)
>>> -       undo_xen_syscall
>>> +       mov 0*8(%rsp), %rcx
>>> +       mov 1*8(%rsp), %r11
>>> +       mov 5*8(%rsp), %rsp
>>>         jmp entry_SYSENTER_compat
>>>  ENDPROC(xen_sysenter_target)
>>
>> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
>> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
>> "parent: write to 0x80 (should fail)", and the fault isn't caught by
>> the signal handler.  It just dumps back to the shell.  The tests pass
>> after reverting this.
>
> I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:

Yes, this is an AMD processor.

> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index a8a4f4c460a6..6255e00f425e 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
>  ENTRY(xen_syscall32_target)
>         popq %rcx
>         popq %r11
> +       movq $__USER32_DS, 4*8(%rsp)
> +       movq $__USER32_CS, 1*8(%rsp)
> +       movq %r11, 2*8(%rsp)
>         jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
> but I haven't tried to diagnose precisely what's going on.
>
> Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
> to be a problem, but it kills opportunistic sysretl.  Maybe that's
> triggering a preexisting bug?

Resetting the CS/SS values worked.  Looking at the Xen hypervisor
code, EFLAGS on the stack should already be set to the value in R11,
so that part doesn't appear necessary.

Shouldn't this also be done for the 64-bit SYSCALL entry, for
consistency with previous code?

--
Brian Gerst

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
  2017-08-14  5:53   ` Andy Lutomirski
                       ` (4 preceding siblings ...)
  2017-08-14 12:46     ` Brian Gerst
@ 2017-08-14 12:46     ` Brian Gerst
  5 siblings, 0 replies; 15+ messages in thread
From: Brian Gerst @ 2017-08-14 12:46 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Juergen Gross, Andrew Cooper, X86 ML, Linux Kernel Mailing List,
	H. Peter Anvin, xen-devel, Boris Ostrovsky, Borislav Petkov

On Mon, Aug 14, 2017 at 1:53 AM, Andy Lutomirski <luto@kernel.org> wrote:
> On Sun, Aug 13, 2017 at 7:44 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>>  /* Normal 64-bit system call target */
>>>  ENTRY(xen_syscall_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_64_after_swapgs
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_64_after_hwframe
>>>  ENDPROC(xen_syscall_target)
>>>
>>>  #ifdef CONFIG_IA32_EMULATION
>>>
>>>  /* 32-bit compat syscall target */
>>>  ENTRY(xen_syscall32_target)
>>> -       undo_xen_syscall
>>> -       jmp entry_SYSCALL_compat
>>> +       popq %rcx
>>> +       popq %r11
>>> +       jmp entry_SYSCALL_compat_after_hwframe
>>>  ENDPROC(xen_syscall32_target)
>>>
>>>  /* 32-bit compat sysenter target */
>>>  ENTRY(xen_sysenter_target)
>>> -       undo_xen_syscall
>>> +       mov 0*8(%rsp), %rcx
>>> +       mov 1*8(%rsp), %r11
>>> +       mov 5*8(%rsp), %rsp
>>>         jmp entry_SYSENTER_compat
>>>  ENDPROC(xen_sysenter_target)
>>
>> This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
>> 64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
>> "parent: write to 0x80 (should fail)", and the fault isn't caught by
>> the signal handler.  It just dumps back to the shell.  The tests pass
>> after reverting this.
>
> I can reproduce it if I emulate an AMD machine.  I can "fix" it like this:

Yes, this is an AMD processor.

> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index a8a4f4c460a6..6255e00f425e 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -97,6 +97,9 @@ ENDPROC(xen_syscall_target)
>  ENTRY(xen_syscall32_target)
>         popq %rcx
>         popq %r11
> +       movq $__USER32_DS, 4*8(%rsp)
> +       movq $__USER32_CS, 1*8(%rsp)
> +       movq %r11, 2*8(%rsp)
>         jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
> but I haven't tried to diagnose precisely what's going on.
>
> Xen seems to be putting the 0xe0?? values in ss and cs, which oughtn't
> to be a problem, but it kills opportunistic sysretl.  Maybe that's
> triggering a preexisting bug?

Resetting the CS/SS values worked.  Looking at the Xen hypervisor
code, EFLAGS on the stack should already be set to the value in R11,
so that part doesn't appear necessary.

Shouldn't this also be done for the 64-bit SYSCALL entry, for
consistency with previous code?

--
Brian Gerst

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries
@ 2017-08-08  3:59 Andy Lutomirski
  0 siblings, 0 replies; 15+ messages in thread
From: Andy Lutomirski @ 2017-08-08  3:59 UTC (permalink / raw)
  To: X86 ML, Juergen Gross
  Cc: linux-kernel, Andy Lutomirski, H. Peter Anvin, xen-devel,
	Boris Ostrovsky, Borislav Petkov

Xen's raw SYSCALL entries are much less weird than native.  Rather
than fudging them to look like native entries, use the Xen-provided
stack frame directly.

This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
benefit from similar treatment.

This makes one change to the native code path: the compat
instruction that clears the high 32 bits of %rax is moved slightly
later.  I'd be surprised if this affects performance at all.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Changes from v1 (which I never actually emailed):
 - Fix zero-extension in the compat case.

 arch/x86/entry/entry_64.S        |  9 ++-------
 arch/x86/entry/entry_64_compat.S |  7 +++----
 arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
 3 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index aa58155187c5..7cee92cf807f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
 	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
 	 * it is too small to ever cause noticeable irq latency.
 	 */
-	SWAPGS_UNSAFE_STACK
-	/*
-	 * A hypervisor implementation might want to use a label
-	 * after the swapgs, so that it can do the swapgs
-	 * for the guest and jump here on syscall.
-	 */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
 
+	swapgs
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
 	pushq	%rax				/* pt_regs->orig_ax */
 	pushq	%rdi				/* pt_regs->di */
 	pushq	%rsi				/* pt_regs->si */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..5314d7b8e5ad 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
  */
 ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
-	SWAPGS_UNSAFE_STACK
+	swapgs
 
 	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	/* Zero-extending 32-bit regs, do not remove */
-	movl	%eax, %eax
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
 	pushq	%r8			/* pt_regs->sp */
 	pushq	%r11			/* pt_regs->flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	%rcx			/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
+	movl	%eax, %eax		/* discard orig_ax high bits */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..a8a4f4c460a6 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
  *	rip
  *	r11
  * rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
  */
 
-.macro undo_xen_syscall
-	mov 0*8(%rsp), %rcx
-	mov 1*8(%rsp), %r11
-	mov 5*8(%rsp), %rsp
-.endm
-
 /* Normal 64-bit system call target */
 ENTRY(xen_syscall_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_64_after_swapgs
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_64_after_hwframe
 ENDPROC(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
 ENTRY(xen_syscall32_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_compat
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
 ENTRY(xen_sysenter_target)
-	undo_xen_syscall
+	mov 0*8(%rsp), %rcx
+	mov 1*8(%rsp), %r11
+	mov 5*8(%rsp), %rsp
 	jmp entry_SYSENTER_compat
 ENDPROC(xen_sysenter_target)
 
-- 
2.13.3


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-08-14 12:46 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-08  3:59 [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
2017-08-09 15:39 ` Juergen Gross
2017-08-09 15:39 ` Juergen Gross
2017-08-10 12:22 ` [tip:x86/asm] " tip-bot for Andy Lutomirski
2017-08-14  2:44 ` [PATCH v2] " Brian Gerst
2017-08-14  2:44 ` Brian Gerst
2017-08-14  5:53   ` Andy Lutomirski
2017-08-14  6:42     ` Andy Lutomirski
2017-08-14  6:42     ` Andy Lutomirski
2017-08-14  6:45     ` Andrew Cooper
2017-08-14  6:45     ` Andrew Cooper
2017-08-14 12:46     ` Brian Gerst
2017-08-14 12:46     ` Brian Gerst
2017-08-14  5:53   ` Andy Lutomirski
  -- strict thread matches above, loose matches on Subject: below --
2017-08-08  3:59 Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.