From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752111AbcHFVtJ (ORCPT ); Sat, 6 Aug 2016 17:49:09 -0400 Received: from mail-wm0-f66.google.com ([74.125.82.66]:35820 "EHLO mail-wm0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751059AbcHFVtI (ORCPT ); Sat, 6 Aug 2016 17:49:08 -0400 Date: Sat, 6 Aug 2016 08:13:20 +0200 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Thomas Gleixner , "H. Peter Anvin" , Peter Zijlstra , Andrew Morton Subject: [GIT PULL] x86 fixes Message-ID: <20160806061320.GA23925@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.24 (2015-08-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Linus, Please pull the latest x86-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus # HEAD: f7d665627e103e82d34306c7d3f6f46f387c0d8b x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace Two fixes and a cleanup-fix, to the syscall entry code and to ptrace. Thanks, Ingo ------------------> Andy Lutomirski (2): x86/vdso: Error out if the vDSO isn't a valid DSO x86/ptrace: Stop setting TS_COMPAT in ptrace code David Howells (1): x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace arch/x86/entry/common.c | 6 +++++- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/vdso/vdso2c.h | 6 ++++++ arch/x86/include/asm/syscall.h | 5 +---- arch/x86/include/asm/thread_info.h | 3 +++ arch/x86/kernel/ptrace.c | 15 +++++++++------ arch/x86/kernel/signal.c | 26 ++++++++++++++++++++++++-- 7 files changed, 49 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 9e1e27d31c6d..be8c403f8117 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -270,8 +270,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * handling, because syscall restart has a fixup for compat * syscalls. The fixup is exercised by the ptrace_syscall_32 * selftest. + * + * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer + * special case only applies after poking regs and before the + * very next return to user mode. */ - ti->status &= ~TS_COMPAT; + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4cddd17153fb..f848572169ea 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -294,7 +294,7 @@ # 285 sys_setaltroot 286 i386 add_key sys_add_key 287 i386 request_key sys_request_key -288 i386 keyctl sys_keyctl +288 i386 keyctl sys_keyctl compat_sys_keyctl 289 i386 ioprio_set sys_ioprio_set 290 i386 ioprio_get sys_ioprio_get 291 i386 inotify_init sys_inotify_init diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 63a03bb91497..4f741192846d 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -22,6 +22,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); + if (hdr->e_type != ET_DYN) + fail("input is not a shared object\n"); + /* Walk the segment table. */ for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { if (GET_LE(&pt[i].p_type) == PT_LOAD) { @@ -49,6 +52,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, if (stripped_len < load_size) fail("stripped input is too short\n"); + if (!dyn) + fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n"); + /* Walk the dynamic table */ for (i = 0; dyn + i < dyn_end && GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 999b7cd2e78c..4e23dd15c661 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task_thread_info(task)->status & TS_COMPAT) + if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -239,9 +239,6 @@ static inline int syscall_get_arch(void) * TS_COMPAT is set for 32-bit syscall entry and then * remains set until we return to user mode. * - * TIF_IA32 tasks should always have TS_COMPAT set at - * system call time. - * * x32 tasks should be considered AUDIT_ARCH_X86_64. */ if (task_thread_info(current)->status & TS_COMPAT) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89bff044a6f5..f856c590c309 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -219,6 +219,9 @@ static inline unsigned long current_stack_pointer(void) * have to worry about atomic accesses. */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +#ifdef CONFIG_COMPAT +#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ +#endif #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 600edd225e81..f79576a541ff 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) case offsetof(struct user32, regs.orig_eax): /* - * A 32-bit debugger setting orig_eax means to restore - * the state of the task restarting a 32-bit syscall. - * Make sure we interpret the -ERESTART* codes correctly - * in case the task is not actually still sitting at the - * exit from a 32-bit syscall with TS_COMPAT still set. + * Warning: bizarre corner case fixup here. A 32-bit + * debugger setting orig_eax to -1 wants to disable + * syscall restart. Make sure that the syscall + * restart code sign-extends orig_ax. Also make sure + * we interpret the -ERESTART* codes correctly if + * loaded into regs->ax in case the task is not + * actually still sitting at the exit from a 32-bit + * syscall with TS_COMPAT still set. */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - task_thread_info(child)->status |= TS_COMPAT; + task_thread_info(child)->status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 22cc2f9f8aec..9747a6358949 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { -#ifdef CONFIG_X86_64 - if (in_ia32_syscall()) + /* + * This function is fundamentally broken as currently + * implemented. + * + * The idea is that we want to trigger a call to the + * restart_block() syscall and that we want in_ia32_syscall(), + * in_x32_syscall(), etc. to match whatever they were in the + * syscall being restarted. We assume that the syscall + * instruction at (regs->ip - 2) matches whatever syscall + * instruction we used to enter in the first place. + * + * The problem is that we can get here when ptrace pokes + * syscall-like values into regs even if we're not in a syscall + * at all. + * + * For now, we maintain historical behavior and guess based on + * stored state. We could do better by saving the actual + * syscall arch in restart_block or (with caveats on x32) by + * checking if regs->ip points to 'int $0x80'. The current + * behavior is incorrect if a tracer has a different bitness + * than the tracee. + */ +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI