From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752555AbcEUQFS (ORCPT ); Sat, 21 May 2016 12:05:18 -0400 Received: from mail-yw0-f194.google.com ([209.85.161.194]:36476 "EHLO mail-yw0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752405AbcEUQFB (ORCPT ); Sat, 21 May 2016 12:05:01 -0400 From: Brian Gerst To: x86@kernel.org, linux-kernel@vger.kernel.org Cc: Ingo Molnar , "H. Peter Anvin" , Denys Vlasenko , Andy Lutomirski , Borislav Petkov , Thomas Gleixner Subject: [PATCH 3/4] x86: Rewrite switch_to() code Date: Sat, 21 May 2016 12:04:50 -0400 Message-Id: <1463846691-18498-4-git-send-email-brgerst@gmail.com> X-Mailer: git-send-email 2.5.5 In-Reply-To: <1463846691-18498-1-git-send-email-brgerst@gmail.com> References: <1463846691-18498-1-git-send-email-brgerst@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Move the low-level context switch code to an out-of-line asm stub instead of using complex inline asm. This allows constructing a new stack frame for the child process to make it seamlessly flow to ret_from_fork without an extra test and branch in __switch_to(). It also improves code generation for __schedule() by using the C calling convention instead of clobbering all registers. Signed-off-by: Brian Gerst --- arch/x86/entry/entry_32.S | 38 ++++++++++ arch/x86/entry/entry_64.S | 42 +++++++++++- arch/x86/include/asm/processor.h | 3 - arch/x86/include/asm/switch_to.h | 137 ++++++------------------------------- arch/x86/include/asm/thread_info.h | 2 - arch/x86/kernel/asm-offsets.c | 6 ++ arch/x86/kernel/asm-offsets_32.c | 5 ++ arch/x86/kernel/asm-offsets_64.c | 5 ++ arch/x86/kernel/process_32.c | 8 ++- arch/x86/kernel/process_64.c | 7 +- arch/x86/kernel/smpboot.c | 1 - 11 files changed, 124 insertions(+), 130 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ee6fea0..05e5340 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -204,6 +204,44 @@ POP_GS_EX .endm +/* + * %eax: prev task + * %edx: next task + */ +ENTRY(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in struct fork_frame + * Frame pointer must be last for get_wchan + */ + pushl %ebx + pushl %edi + pushl %esi + pushl %ebp + + /* switch stack */ + movl %esp, TASK_threadsp(%eax) + movl TASK_threadsp(%edx), %esp + +#ifdef CONFIG_CC_STACKPROTECTOR + movl TASK_stack_canary(%edx), %ebx + movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popl %ebp + popl %esi + popl %edi + popl %ebx + + jmp __switch_to +END(__switch_to_asm) + +/* + * A newly forked process directly context switches into this address. + * + * eax: prev task we switched from + */ ENTRY(ret_from_fork) pushl %eax call schedule_tail diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ab9f8c8..0542ad1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -365,13 +365,49 @@ END(ptregs_\func) #include /* + * %rdi: prev task + * %rsi: next task + */ +ENTRY(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in struct fork_frame + * Frame pointer must be last for get_wchan + */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + + /* switch stack */ + movq %rsp, TASK_threadsp(%rdi) + movq TASK_threadsp(%rsi), %rsp + +#ifdef CONFIG_CC_STACKPROTECTOR + movq TASK_stack_canary(%rsi), %rbx + movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + + jmp __switch_to +END(__switch_to_asm) + +/* * A newly forked process directly context switches into this address. * - * rdi: prev task we switched from + * rax: prev task we switched from */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) - + movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ testb $3, CS(%rsp) /* from kernel_thread? */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 62c6cc3..d3c2598 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -384,9 +384,6 @@ struct thread_struct { unsigned short fsindex; unsigned short gsindex; #endif -#ifdef CONFIG_X86_32 - unsigned long ip; -#endif #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8f321a1..b6c9e0c 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -2,130 +2,35 @@ #define _ASM_X86_SWITCH_TO_H struct task_struct; /* one of the stranger aspects of C forward declarations */ + +struct task_struct *__switch_to_asm(struct task_struct *prev, + struct task_struct *next); + __visible struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next); + struct task_struct *next); struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movl %P[task_canary](%[next]), %%ebx\n\t" \ - "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" -#define __switch_canary_oparam \ - , [stack_canary] "=m" (stack_canary.canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ +struct fork_frame { + unsigned long bp; +#ifdef CONFIG_X86_64 + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; +#else + unsigned long si; + unsigned long di; +#endif + unsigned long bx; + unsigned long ret_addr; + struct pt_regs regs; +}; -/* - * Saving eflags is important. It switches not only IOPL between tasks, - * it also protects other tasks from NT leaking through sysenter etc. - */ #define switch_to(prev, next, last) \ do { \ - /* \ - * Context-switching clobbers all registers, so we clobber \ - * them explicitly, via unused output variables. \ - * (EAX and EBP is not listed because EBP is saved/restored \ - * explicitly for wchan access and EAX is the return value of \ - * __switch_to()) \ - */ \ - unsigned long ebx, ecx, edx, esi, edi; \ - \ - asm volatile("pushl %%ebp\n\t" /* save EBP */ \ - "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ - "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ - "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ - "pushl %[next_ip]\n\t" /* restore EIP */ \ - __switch_canary \ - "jmp __switch_to\n" /* regparm call */ \ - "1:\t" \ - "popl %%ebp\n\t" /* restore EBP */ \ - \ - /* output parameters */ \ - : [prev_sp] "=m" (prev->thread.sp), \ - [prev_ip] "=m" (prev->thread.ip), \ - "=a" (last), \ - \ - /* clobbered output registers: */ \ - "=b" (ebx), "=c" (ecx), "=d" (edx), \ - "=S" (esi), "=D" (edi) \ - \ - __switch_canary_oparam \ - \ - /* input parameters: */ \ - : [next_sp] "m" (next->thread.sp), \ - [next_ip] "m" (next->thread.ip), \ - \ - /* regparm parameters for __switch_to(): */ \ - [prev] "a" (prev), \ - [next] "d" (next) \ - \ - __switch_canary_iparam \ - \ - : /* reloaded segment registers */ \ - "memory"); \ + ((last) = __switch_to_asm((prev), (next))); \ } while (0) -#else /* CONFIG_X86_32 */ - -/* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" - -#define __EXTRA_CLOBBER \ - , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15", "flags" - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,"__percpu_arg([gs_canary])"\n\t" -#define __switch_canary_oparam \ - , [gs_canary] "=m" (irq_stack_union.stack_canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ - -/* - * There is no need to save or restore flags, because flags are always - * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL - * has no effect. - */ -#define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ - "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ - "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ - "call __switch_to\n\t" \ - "movq "__percpu_arg([current_task])",%%rsi\n\t" \ - __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ - "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ - "jnz ret_from_fork\n\t" \ - RESTORE_CONTEXT \ - : "=a" (last) \ - __switch_canary_oparam \ - : [next] "S" (next), [prev] "D" (prev), \ - [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (current_task) \ - __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) - -#endif /* CONFIG_X86_32 */ - #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30c133a..20d56ec 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -99,7 +99,6 @@ struct thread_info { #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ -#define TIF_FORK 18 /* ret_from_fork */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ @@ -123,7 +122,6 @@ struct thread_info { #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 674134e..ec41c79 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -29,6 +29,12 @@ void common(void) { BLANK(); + OFFSET(TASK_threadsp, task_struct, thread.sp); +#ifdef CONFIG_CC_STACKPROTECTOR + OFFSET(TASK_stack_canary, task_struct, stack_canary); +#endif + + BLANK(); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ecdc1d2..880aa09 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -57,6 +57,11 @@ void foo(void) /* Size of SYSENTER_stack */ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); +#ifdef CONFIG_CC_STACKPROTECTOR + BLANK(); + OFFSET(stack_canary_offset, stack_canary, canary); +#endif + #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d875f97..210927e 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -56,6 +56,11 @@ int main(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); BLANK(); +#ifdef CONFIG_CC_STACKPROTECTOR + DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); + BLANK(); +#endif + DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(NR_syscalls, sizeof(syscalls_64)); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9f95091..0ba6fdf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -133,17 +133,19 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); + struct fork_frame *frame = container_of(childregs, struct fork_frame, regs); struct task_struct *tsk; int err; - p->thread.sp = (unsigned long) childregs; + frame->bp = 0; + p->thread.sp = (unsigned long) frame; p->thread.sp0 = (unsigned long) (childregs+1); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - p->thread.ip = (unsigned long) ret_from_kernel_thread; + frame->ret_addr = (unsigned long) ret_from_kernel_thread; task_user_gs(p) = __KERNEL_STACK_CANARY; childregs->ds = __USER_DS; childregs->es = __USER_DS; @@ -161,7 +163,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (sp) childregs->sp = sp; - p->thread.ip = (unsigned long) ret_from_fork; + frame->ret_addr = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); p->thread.io_bitmap_ptr = NULL; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6e789ca..9fab915 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -142,11 +142,14 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, int err; struct pt_regs *childregs; struct task_struct *me = current; + struct fork_frame *frame; p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); - p->thread.sp = (unsigned long) childregs; - set_tsk_thread_flag(p, TIF_FORK); + frame = container_of(childregs, struct fork_frame, regs); + frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) frame; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b9..8feb392 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -928,7 +928,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) per_cpu(cpu_current_top_of_stack, cpu) = (unsigned long)task_stack_page(idle) + THREAD_SIZE; #else - clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif } -- 2.5.5