All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander van Heukelum <heukelum@fastmail.fm>
To: Andy Lutomirski <luto@amacapital.net>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	Oleg Nesterov <oleg@redhat.com>, Borislav Petkov <bp@suse.de>,
	Rik van Riel <riel@redhat.com>
Subject: [PATCHv2 4/4] x86_64, entry: Create IRET-compatible stack frame at syscall entry
Date: Sun, 18 Jan 2015 12:45:20 +0100	[thread overview]
Message-ID: <1421581520-2816-5-git-send-email-heukelum@fastmail.fm> (raw)
In-Reply-To: <1421581520-2816-1-git-send-email-heukelum@fastmail.fm>

Create an IRET-compatible top of stack at syscall entry and use this
information to return to user mode in the sysret path. This removes
the need for the FIXUP_TOP_OF_STACK and RESTORE_TOP_OF_STACK macros.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
---
 arch/x86/kernel/entry_64.S | 75 +++++++++++++---------------------------------
 1 file changed, 21 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6b95c2f..c4cb8f1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -33,8 +33,6 @@
  * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
  * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  * - idtentry - Define exception entry points.
  */
@@ -130,33 +128,6 @@ ENDPROC(native_usergs_sysret64)
 #endif
 
 /*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
-	/* %rsp:at FRAMEEND */
-	.macro FIXUP_TOP_OF_STACK tmp offset=0
-	movq PER_CPU_VAR(old_rsp),\tmp
-	movq \tmp,RSP+\offset(%rsp)
-	movq $__USER_DS,SS+\offset(%rsp)
-	movq $__USER_CS,CS+\offset(%rsp)
-	movq RIP+\offset(%rsp),\tmp  /* get rip */
-	movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
-	movq R11+\offset(%rsp),\tmp  /* get eflags */
-	movq \tmp,EFLAGS+\offset(%rsp)
-	.endm
-
-	.macro RESTORE_TOP_OF_STACK tmp offset=0
-	movq RSP+\offset(%rsp),\tmp
-	movq \tmp,PER_CPU_VAR(old_rsp)
-	movq EFLAGS+\offset(%rsp),\tmp
-	movq \tmp,R11+\offset(%rsp)
-	.endm
-
-/*
  * initial frame state for interrupts (and exceptions without error code)
  */
 	.macro EMPTY_FRAME start=1 offset=0
@@ -272,7 +243,6 @@ ENTRY(ret_from_fork)
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
 
-	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
@@ -339,10 +309,24 @@ GLOBAL(system_call_after_swapgs)
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 6*8, 0, rax_enosys=1	/* skip: hardware stackframe and orig_rax */
+	/*
+	 * Save user mode rsp (temporarily saved above in old_rsp),
+	 * rflags (%r11), rip (%rcx) and segments (fixed values) on
+	 * the stack as a regular interrupt frame.
+	 */
+	pushq_cfi $__USER_DS
+	/* CFI_REL_OFFSET ss, 0 */
+	pushq_cfi PER_CPU_VAR(old_rsp)
+	CFI_REL_OFFSET rsp, 0
+	pushq_cfi %r11 /* %r11 clobbered (userspace %rflags) */
+	/* CFI_REL_OFFSET rflags, 0 */
+	pushq_cfi $__USER_CS
+	/* CFI_REL_OFFSET cs, 0 */
+	pushq_cfi %rcx /* %rcx clobbered (userspace %rip) */
+	CFI_REL_OFFSET rip, 0
+
+	SAVE_ARGS 8, rax_enosys=1
 	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,ARGOFFSET)
 	jnz tracesys
 system_call_fastpath:
@@ -362,7 +346,7 @@ system_call_fastpath:
  */
 ret_from_sys_call:
 	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,ARGOFFSET)
-	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
+	jnz int_ret_from_sys_call	/* Go the the slow path */
 
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -372,19 +356,16 @@ ret_from_sys_call:
 	 * sysretq will re-enable interrupts:
 	 */
 	TRACE_IRQS_ON
+	RESTORE_ARGS addskip=-ARG_SKIP, rstor_rcx=0, rstor_r11=0
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
+	mov EFLAGS-ARGOFFSET(%rsp), %r11
 	/*CFI_REGISTER	rflags,r11*/
-	movq	PER_CPU_VAR(old_rsp), %rsp
+	mov RSP-ARGOFFSET(%rsp), %rsp
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
 
-int_ret_from_sys_call_fixup:
-	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-	jmp int_ret_from_sys_call
-
 	/* Do syscall tracing */
 tracesys:
 	leaq -REST_SKIP(%rsp), %rdi
@@ -397,7 +378,6 @@ tracesys:
 
 tracesys_phase2:
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
 	movq %rax,%rdx
@@ -493,10 +473,8 @@ ENTRY(stub_\func)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	pushq	%r11			/* put it back on stack */
-	FIXUP_TOP_OF_STACK %r11, 8
 	DEFAULT_FRAME 0 8		/* offset 8: return address */
 	call sys_\func
-	RESTORE_TOP_OF_STACK %r11, 8
 	ret $REST_SKIP		/* pop extended registers */
 	CFI_ENDPROC
 END(stub_\func)
@@ -506,9 +484,7 @@ END(stub_\func)
 ENTRY(\label)
 	CFI_STARTPROC
 	PARTIAL_FRAME 0 8		/* offset 8: return address */
-	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
 	call \func
-	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
 	ret
 	CFI_ENDPROC
 END(\label)
@@ -524,7 +500,6 @@ ENTRY(stub_execve)
 	addq $8, %rsp
 	PARTIAL_FRAME 0
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
@@ -537,9 +512,7 @@ ENTRY(stub_execveat)
 	addq $8, %rsp
 	PARTIAL_FRAME 0
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
 	call sys_execveat
-	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
@@ -555,7 +528,6 @@ ENTRY(stub_rt_sigreturn)
 	addq $8, %rsp
 	PARTIAL_FRAME 0
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
 	call sys_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 	RESTORE_REST
@@ -569,7 +541,6 @@ ENTRY(stub_x32_rt_sigreturn)
 	addq $8, %rsp
 	PARTIAL_FRAME 0
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
 	call sys32_x32_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 	RESTORE_REST
@@ -582,9 +553,7 @@ ENTRY(stub_x32_execve)
 	addq $8, %rsp
 	PARTIAL_FRAME 0
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execve
-	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
@@ -596,9 +565,7 @@ ENTRY(stub_x32_execveat)
 	addq $8, %rsp
 	PARTIAL_FRAME 0
 	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execveat
-	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
-- 
2.1.0


  parent reply	other threads:[~2015-01-18 11:45 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-18 11:45 [PATCHv2 0/4] x86, entry: some cleanup and simplification Alexander van Heukelum
2015-01-18 11:45 ` [PATCHv2 1/4] x86_64: cleanup THREAD_INFO(reg,offset) macro Alexander van Heukelum
2015-01-21 13:40   ` Denys Vlasenko
2015-01-21 16:20     ` Alexander van Heukelum
2015-01-21 18:04       ` Borislav Petkov
2015-01-21 18:48         ` Alexander van Heukelum
2015-01-18 11:45 ` [PATCHv2 2/4] x86_64: embrace KERNEL_STACK_OFFSET Alexander van Heukelum
2015-01-21 13:44   ` Denys Vlasenko
2015-01-21 16:29     ` Alexander van Heukelum
2015-01-23  0:53       ` Denys Vlasenko
2015-01-18 11:45 ` [PATCHv2 3/4] i386: clean up KERNEL_STACK_OFFSET Alexander van Heukelum
2015-01-18 11:45 ` Alexander van Heukelum [this message]
2015-01-18 16:38   ` [PATCHv2 4/4] x86_64, entry: Create IRET-compatible stack frame at syscall entry Andy Lutomirski
2015-01-18 17:22     ` Alexander van Heukelum
2015-01-18 12:05 ` [PATCHv2 0/4] x86, entry: some cleanup and simplification Borislav Petkov
2015-01-18 15:47   ` Alexander van Heukelum
2015-01-21 13:26     ` Denys Vlasenko
2015-01-21 15:51       ` Alexander van Heukelum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1421581520-2816-5-git-send-email-heukelum@fastmail.fm \
    --to=heukelum@fastmail.fm \
    --cc=bp@suse.de \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=oleg@redhat.com \
    --cc=riel@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.