From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752583AbcEUQFW (ORCPT ); Sat, 21 May 2016 12:05:22 -0400 Received: from mail-yw0-f193.google.com ([209.85.161.193]:33922 "EHLO mail-yw0-f193.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752409AbcEUQFC (ORCPT ); Sat, 21 May 2016 12:05:02 -0400 From: Brian Gerst To: x86@kernel.org, linux-kernel@vger.kernel.org Cc: Ingo Molnar , "H. Peter Anvin" , Denys Vlasenko , Andy Lutomirski , Borislav Petkov , Thomas Gleixner Subject: [PATCH 4/4] x86: Pass kernel thread parameters in fork_frame Date: Sat, 21 May 2016 12:04:51 -0400 Message-Id: <1463846691-18498-5-git-send-email-brgerst@gmail.com> X-Mailer: git-send-email 2.5.5 In-Reply-To: <1463846691-18498-1-git-send-email-brgerst@gmail.com> References: <1463846691-18498-1-git-send-email-brgerst@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Instead of setting up a fake pt_regs context, put the kernel thread function pointer and arg into the unused callee-restored registers of struct fork_frame. Signed-off-by: Brian Gerst --- arch/x86/entry/entry_32.S | 28 +++++++++++----------------- arch/x86/entry/entry_64.S | 30 +++++++++++------------------- arch/x86/kernel/process_32.c | 15 ++++----------- arch/x86/kernel/process_64.c | 10 +++------- 4 files changed, 29 insertions(+), 54 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 05e5340..424e8d3 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -241,35 +241,29 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * eax: prev task we switched from + * ebx: kernel thread func + * edi: kernel thread arg */ ENTRY(ret_from_fork) pushl %eax call schedule_tail popl %eax + testl %ebx, %ebx + jnz 1f + +2: /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax call syscall_return_slowpath jmp restore_all -END(ret_from_fork) -ENTRY(ret_from_kernel_thread) - pushl %eax - call schedule_tail - popl %eax - movl PT_EBP(%esp), %eax - call *PT_EBX(%esp) + /* kernel thread */ +1: movl %edi, %eax + call *%ebx movl %eax, PT_EAX(%esp) - - /* - * Kernel threads return to userspace as if returning from a syscall. - * We should check whether anything actually uses this path and, if so, - * consider switching it over to ret_from_fork. - */ - movl %esp, %eax - call syscall_return_slowpath - jmp restore_all -ENDPROC(ret_from_kernel_thread) + jmp 2b +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0542ad1..cc8a0ba 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -405,37 +405,29 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * rax: prev task we switched from + * rbx: kernel thread func + * r12: kernel thread arg */ ENTRY(ret_from_fork) movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ - testb $3, CS(%rsp) /* from kernel_thread? */ + testq %rbx, %rbx /* from kernel_thread? */ jnz 1f - /* - * We came from kernel_thread. This code path is quite twisted, and - * someone should clean it up. - * - * copy_thread_tls stashes the function pointer in RBX and the - * parameter to be passed in RBP. The called function is permitted - * to call do_execve and thereby jump to user mode. - */ - movq RBP(%rsp), %rdi - call *RBX(%rsp) - movq %rax, RAX(%rsp) - - /* - * Fall through as though we're exiting a syscall. This makes a - * twisted sort of sense if we just called do_execve. - */ - -1: +2: movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS jmp restore_regs_and_iret + +1: + /* kernel thread */ + movq %r12, %rdi + call *%rbx + movq %rax, RAX(%rsp) + jmp 2b END(ret_from_fork) /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0ba6fdf..bba563f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -138,6 +138,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, int err; frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) frame; p->thread.sp0 = (unsigned long) (childregs+1); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -145,25 +146,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - frame->ret_addr = (unsigned long) ret_from_kernel_thread; - task_user_gs(p) = __KERNEL_STACK_CANARY; - childregs->ds = __USER_DS; - childregs->es = __USER_DS; - childregs->fs = __KERNEL_PERCPU; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->di = arg; p->thread.io_bitmap_ptr = NULL; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; if (sp) childregs->sp = sp; - frame->ret_addr = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); p->thread.io_bitmap_ptr = NULL; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9fab915..421646a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -163,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->sp = (unsigned long)childregs; - childregs->ss = __KERNEL_DS; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->r12 = arg; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; -- 2.5.5