From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755838AbcHXNKD (ORCPT ); Wed, 24 Aug 2016 09:10:03 -0400 Received: from terminus.zytor.com ([198.137.202.10]:49132 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755141AbcHXNJ4 (ORCPT ); Wed, 24 Aug 2016 09:09:56 -0400 Date: Wed, 24 Aug 2016 06:09:00 -0700 From: tip-bot for Brian Gerst Message-ID: Cc: hpa@zytor.com, peterz@infradead.org, linux-kernel@vger.kernel.org, bp@alien8.de, dvlasenk@redhat.com, luto@kernel.org, mingo@kernel.org, tglx@linutronix.de, jpoimboe@redhat.com, torvalds@linux-foundation.org, brgerst@gmail.com Reply-To: luto@kernel.org, dvlasenk@redhat.com, tglx@linutronix.de, mingo@kernel.org, torvalds@linux-foundation.org, jpoimboe@redhat.com, brgerst@gmail.com, hpa@zytor.com, peterz@infradead.org, linux-kernel@vger.kernel.org, bp@alien8.de In-Reply-To: <1471106302-10159-6-git-send-email-brgerst@gmail.com> References: <1471106302-10159-6-git-send-email-brgerst@gmail.com> To: linux-tip-commits@vger.kernel.org Subject: [tip:x86/asm] sched/x86: Pass kernel thread parameters in 'struct fork_frame' Git-Commit-ID: 616d24835eeafa8ef3466479db028abfdfc77531 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Commit-ID: 616d24835eeafa8ef3466479db028abfdfc77531 Gitweb: http://git.kernel.org/tip/616d24835eeafa8ef3466479db028abfdfc77531 Author: Brian Gerst AuthorDate: Sat, 13 Aug 2016 12:38:20 -0400 Committer: Ingo Molnar CommitDate: Wed, 24 Aug 2016 12:31:50 +0200 sched/x86: Pass kernel thread parameters in 'struct fork_frame' Instead of setting up a fake pt_regs context, put the kernel thread function pointer and arg into the unused callee-restored registers of 'struct fork_frame'. Signed-off-by: Brian Gerst Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 31 +++++++++++++++---------------- arch/x86/entry/entry_64.S | 37 +++++++++++++++++-------------------- arch/x86/include/asm/switch_to.h | 2 ++ arch/x86/kernel/process_32.c | 18 ++++-------------- arch/x86/kernel/process_64.c | 12 +++--------- 5 files changed, 41 insertions(+), 59 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bf8f221..b75a8bc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -240,35 +240,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * eax: prev task we switched from + * ebx: kernel thread func (NULL for user thread) + * edi: kernel thread arg */ ENTRY(ret_from_fork) pushl %eax call schedule_tail popl %eax + testl %ebx, %ebx + jnz 1f /* kernel threads are uncommon */ + +2: /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax call syscall_return_slowpath jmp restore_all -END(ret_from_fork) - -ENTRY(ret_from_kernel_thread) - pushl %eax - call schedule_tail - popl %eax - movl PT_EBP(%esp), %eax - call *PT_EBX(%esp) - movl $0, PT_EAX(%esp) + /* kernel thread */ +1: movl %edi, %eax + call *%ebx /* - * Kernel threads return to userspace as if returning from a syscall. - * We should check whether anything actually uses this path and, if so, - * consider switching it over to ret_from_fork. + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() + * syscall. */ - movl %esp, %eax - call syscall_return_slowpath - jmp restore_all -ENDPROC(ret_from_kernel_thread) + movl $0, PT_EAX(%esp) + jmp 2b +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c1af8ac..c0373d6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -407,37 +407,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * rax: prev task we switched from + * rbx: kernel thread func (NULL for user thread) + * r12: kernel thread arg */ ENTRY(ret_from_fork) movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ - testb $3, CS(%rsp) /* from kernel_thread? */ - jnz 1f - - /* - * We came from kernel_thread. This code path is quite twisted, and - * someone should clean it up. - * - * copy_thread_tls stashes the function pointer in RBX and the - * parameter to be passed in RBP. The called function is permitted - * to call do_execve and thereby jump to user mode. - */ - movq RBP(%rsp), %rdi - call *RBX(%rsp) - movl $0, RAX(%rsp) - - /* - * Fall through as though we're exiting a syscall. This makes a - * twisted sort of sense if we just called do_execve. - */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ -1: +2: movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS jmp restore_regs_and_iret + +1: + /* kernel thread */ + movq %r12, %rdi + call *%rbx + /* + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() + * syscall. + */ + movq $0, RAX(%rsp) + jmp 2b END(ret_from_fork) /* diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 886d5ea..5cb436a 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev, #endif } +asmlinkage void ret_from_fork(void); + /* data that is pointed to by thread.sp */ struct inactive_task_frame { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4bedbc0..18714a1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,9 +55,6 @@ #include #include -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); - /* * Return saved PC of a blocked thread. */ @@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, int err; frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; p->thread.sp0 = (unsigned long) (childregs+1); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - frame->ret_addr = (unsigned long) ret_from_kernel_thread; - task_user_gs(p) = __KERNEL_STACK_CANARY; - childregs->ds = __USER_DS; - childregs->es = __USER_DS; - childregs->fs = __KERNEL_PERCPU; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->di = arg; p->thread.io_bitmap_ptr = NULL; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; if (sp) childregs->sp = sp; - frame->ret_addr = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); p->thread.io_bitmap_ptr = NULL; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 827eeed..b812cd0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -50,8 +50,6 @@ #include #include -asmlinkage extern void ret_from_fork(void); - __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ @@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->sp = (unsigned long)childregs; - childregs->ss = __KERNEL_DS; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->r12 = arg; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0;