From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755323AbcFTPS3 (ORCPT ); Mon, 20 Jun 2016 11:18:29 -0400 Received: from mail-ob0-f196.google.com ([209.85.214.196]:35087 "EHLO mail-ob0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751881AbcFTPRg (ORCPT ); Mon, 20 Jun 2016 11:17:36 -0400 MIME-Version: 1.0 In-Reply-To: <20160620135158.GE22065@nazgul.tnic> References: <1466283378-17062-1-git-send-email-brgerst@gmail.com> <1466283378-17062-6-git-send-email-brgerst@gmail.com> <20160620135158.GE22065@nazgul.tnic> From: Brian Gerst Date: Mon, 20 Jun 2016 11:01:02 -0400 Message-ID: Subject: Re: [PATCH v2 5/6] x86: Pass kernel thread parameters in fork_frame To: Borislav Petkov Cc: "the arch/x86 maintainers" , Linux Kernel Mailing List , Ingo Molnar , "H. Peter Anvin" , Denys Vlasenko , Andy Lutomirski , Thomas Gleixner Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Mon, Jun 20, 2016 at 9:51 AM, Borislav Petkov wrote: > On Sat, Jun 18, 2016 at 04:56:17PM -0400, Brian Gerst wrote: >> Instead of setting up a fake pt_regs context, put the kernel thread >> function pointer and arg into the unused callee-restored registers >> of struct fork_frame. >> >> Signed-off-by: Brian Gerst > >> --- >> arch/x86/entry/entry_32.S | 31 +++++++++++++++---------------- >> arch/x86/entry/entry_64.S | 35 ++++++++++++++++------------------- >> arch/x86/kernel/process_32.c | 14 ++++---------- >> arch/x86/kernel/process_64.c | 10 +++------- >> 4 files changed, 38 insertions(+), 52 deletions(-) > > ... > >> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S >> index 7574528..23e764c 100644 >> --- a/arch/x86/entry/entry_64.S >> +++ b/arch/x86/entry/entry_64.S >> @@ -404,37 +404,34 @@ END(__switch_to_asm) >> * A newly forked process directly context switches into this address. >> * >> * rax: prev task we switched from >> + * rbx: kernel thread func >> + * r12: kernel thread arg >> */ >> ENTRY(ret_from_fork) >> movq %rax, %rdi >> call schedule_tail /* rdi: 'prev' task parameter */ >> >> - testb $3, CS(%rsp) /* from kernel_thread? */ >> + testq %rbx, %rbx /* from kernel_thread? */ >> jnz 1f >> >> - /* >> - * We came from kernel_thread. This code path is quite twisted, and >> - * someone should clean it up. >> - * >> - * copy_thread_tls stashes the function pointer in RBX and the >> - * parameter to be passed in RBP. The called function is permitted >> - * to call do_execve and thereby jump to user mode. >> - */ >> - movq RBP(%rsp), %rdi >> - call *RBX(%rsp) >> - movl $0, RAX(%rsp) >> - >> - /* >> - * Fall through as though we're exiting a syscall. This makes a >> - * twisted sort of sense if we just called do_execve. >> - */ >> - >> -1: >> +2: >> movq %rsp, %rdi >> call syscall_return_slowpath /* returns with IRQs disabled */ >> TRACE_IRQS_ON /* user mode is traced as IRQS on */ >> SWAPGS >> jmp restore_regs_and_iret >> + >> +1: >> + /* kernel thread */ >> + movq %r12, %rdi >> + call *%rbx >> + /* >> + * A kernel thread is allowed to return here after successfully >> + * calling do_execve(). Exit to userspace to complete the execve() >> + * syscall. >> + */ >> + movq $0, RAX(%rsp) >> + jmp 2b >> END(ret_from_fork) > > Wouldn't it be simpler to reverse the JNZ after the TESTB above and thus > have a single label and simpler code flow (pasting the whole thing here > because a diff is unreadable): > > ENTRY(ret_from_fork) > movq %rax, %rdi > call schedule_tail /* rdi: 'prev' task parameter */ > > testq %rbx, %rbx /* from kernel_thread? */ > jz 1f > > /* kernel thread */ > movq %r12, %rdi > call *%rbx > /* > * A kernel thread is allowed to return here after successfully > * calling do_execve(). Exit to userspace to complete the execve() > * syscall. > */ > movq $0, RAX(%rsp) > > 1: > movq %rsp, %rdi > call syscall_return_slowpath /* returns with IRQs disabled */ > TRACE_IRQS_ON /* user mode is traced as IRQS on */ > SWAPGS > jmp restore_regs_and_iret > END(ret_from_fork) > > It boots fine in my guest this way too. The idea was to put the uncommon case (kernel thread) out of line for performance reasons. -- Brian Gerst