From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S378745AbdEAKZj (ORCPT ); Mon, 1 May 2017 06:25:39 -0400 Received: from mail-wr0-f177.google.com ([209.85.128.177]:35593 "EHLO mail-wr0-f177.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S378730AbdEAKZc (ORCPT ); Mon, 1 May 2017 06:25:32 -0400 Date: Mon, 1 May 2017 12:25:27 +0200 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Thomas Gleixner , "H. Peter Anvin" , Peter Zijlstra , Andrew Morton Subject: [GIT PULL] x86/process changes for v4.12 Message-ID: <20170501102527.veaso6aldt5kdx7r@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: NeoMutt/20170113 (1.7.2) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Linus, Please pull the latest x86-process-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-process-for-linus # HEAD: f3e4559781bf0c393364baa6b772233088a711db x86/syscalls/32: Ignore arch_prctl for other architectures The main change in this cycle was to add the ARCH_[GET|SET]_CPUID prctl() ABI extension to control the availability of the CPUID instruction, analogously to the existing PR_GET|SET_TSC ABI that controls RDTSC. Motivation: the 'rr' user-space record-and-replay execution debugger would like to trap and emulate the CPUID instruction - which instruction is normally unprivileged. Trapping CPUID is possible on IvyBridge and later Intel CPUs - expose this hardware capability. out-of-topic modifications in x86-process-for-linus: ------------------------------------------------------ arch/um/include/shared/os.h # 17a6e1b8e8e8: x86/arch_prctl/64: Rename do # dd93938a92dc: x86/arch_prctl: Rename 'code fs/exec.c # e9ea1e7f53b8: x86/arch_prctl: Add ARCH_[GE include/linux/compat.h # 79170fda313e: x86/syscalls/32: Wire up arc include/linux/thread_info.h # e9ea1e7f53b8: x86/arch_prctl: Add ARCH_[GE scripts/checksyscalls.sh # f3e4559781bf: x86/syscalls/32: Ignore arch Thanks, Ingo ------------------> Arnd Bergmann (1): x86/syscalls/32: Ignore arch_prctl for other architectures Kyle Huey (11): x86/process: Optimize TIF checks in __switch_to_xtra() x86/process: Correct and optimize TIF_BLOCKSTEP switch x86/msr: Rename MISC_FEATURE_ENABLES to MISC_FEATURES_ENABLES x86/arch_prctl: Rename 'code' argument to 'option' x86/arch_prctl/64: Use SYSCALL_DEFINE2 to define sys_arch_prctl() x86/arch_prctl/64: Rename do_arch_prctl() to do_arch_prctl_64() x86/arch_prctl: Add do_arch_prctl_common() x86/syscalls/32: Wire up arch_prctl on x86-32 x86/cpufeature: Detect CPUID faulting support x86/arch_prctl: Add ARCH_[GET|SET]_CPUID um/arch_prctl: Fix fallout from x86 arch_prctl() rework Thomas Gleixner (1): x86/process: Optimize TIF_NOTSC switch arch/um/include/shared/os.h | 4 +- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 11 ++- arch/x86/include/asm/processor.h | 2 + arch/x86/include/asm/proto.h | 4 +- arch/x86/include/asm/thread_info.h | 6 +- arch/x86/include/asm/tlbflush.h | 10 +++ arch/x86/include/uapi/asm/prctl.h | 11 ++- arch/x86/kernel/cpu/intel.c | 40 +++++++-- arch/x86/kernel/process.c | 151 +++++++++++++++++++++++++-------- arch/x86/kernel/process_32.c | 7 ++ arch/x86/kernel/process_64.c | 48 +++++++---- arch/x86/kernel/ptrace.c | 8 +- arch/x86/um/Makefile | 2 +- arch/x86/um/asm/ptrace.h | 2 +- arch/x86/um/os-Linux/prctl.c | 4 +- arch/x86/um/syscalls_32.c | 7 ++ arch/x86/um/syscalls_64.c | 20 +++-- fs/exec.c | 1 + include/linux/compat.h | 2 + include/linux/thread_info.h | 4 + scripts/checksyscalls.sh | 1 + 23 files changed, 259 insertions(+), 88 deletions(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index de5d572225f3..cd1fa97776c3 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd); extern void maybe_sigio_broken(int fd, int read); extern void sigio_broken(int fd, int read); -/* sys-x86_64/prctl.c */ -extern int os_arch_prctl(int pid, int code, unsigned long *addr); +/* prctl.c */ +extern int os_arch_prctl(int pid, int option, unsigned long *arg2); /* tty.c */ extern int get_pty(void); diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9ba050fe47f3..0af59fa789ea 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -390,3 +390,4 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx +384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b04bb6dfed7f..0fe00446f9ca 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -187,6 +187,7 @@ * Reuse free bits when adding new feature flags! */ #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d8b5f8ab8ef9..673f9ac50f6d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -45,6 +45,8 @@ #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd #define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) @@ -127,6 +129,7 @@ /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) @@ -552,10 +555,12 @@ #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) -/* MISC_FEATURE_ENABLES non-architectural features */ -#define MSR_MISC_FEATURE_ENABLES 0x00000140 +/* MISC_FEATURES_ENABLES non-architectural features */ +#define MSR_MISC_FEATURES_ENABLES 0x00000140 -#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1 +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) +#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 #define MSR_IA32_TSC_DEADLINE 0x000006E0 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f385eca5407a..a80c1b3997ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); +DECLARE_PER_CPU(u64, msr_misc_features_shadow); + /* Register/unregister a process' MPX related resource */ #define MPX_ENABLE_MANAGEMENT() mpx_enable_management() #define MPX_DISABLE_MANAGEMENT() mpx_disable_management() diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 9b9b30b19441..8d3964fc5f91 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -9,6 +9,7 @@ void syscall_init(void); #ifdef CONFIG_X86_64 void entry_SYSCALL_64(void); +long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); #endif #ifdef CONFIG_X86_32 @@ -30,6 +31,7 @@ void x86_report_nx(void); extern int reboot_force; -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +long do_arch_prctl_common(struct task_struct *task, int option, + unsigned long cpuid_enabled); #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95..9fc44b95f7cb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -87,6 +87,7 @@ struct thread_info { #define TIF_SECCOMP 8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ @@ -110,6 +111,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_NOHZ (1 << TIF_NOHZ) @@ -138,7 +140,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack, extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); +extern void arch_setup_new_exec(void); +#define arch_setup_new_exec arch_setup_new_exec #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..ff4923a19f79 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask) } } +static inline void cr4_toggle_bits(unsigned long mask) +{ + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); + cr4 ^= mask; + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Read the CR4 shadow. */ static inline unsigned long cr4_read_shadow(void) { diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 835aa51c7f6e..c45765517092 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -1,10 +1,13 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 + +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 #define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002 diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 063197771b8d..dfa90a3a5145 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) return; } - if (ring3mwait_disabled) { - msr_clear_bit(MSR_MISC_FEATURE_ENABLES, - MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); + if (ring3mwait_disabled) return; - } - - msr_set_bit(MSR_MISC_FEATURE_ENABLES, - MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); set_cpu_cap(c, X86_FEATURE_RING3MWAIT); + this_cpu_or(msr_misc_features_shadow, + 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT); if (c == &boot_cpu_data) ELF_HWCAP2 |= HWCAP2_RING3MWAIT; @@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c) init_intel_energy_perf(c); } +static void init_cpuid_fault(struct cpuinfo_x86 *c) +{ + u64 msr; + + if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) { + if (msr & MSR_PLATFORM_INFO_CPUID_FAULT) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); + } +} + +static void init_intel_misc_features(struct cpuinfo_x86 *c) +{ + u64 msr; + + if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr)) + return; + + /* Clear all MISC features */ + this_cpu_write(msr_misc_features_shadow, 0); + + /* Check features and update capabilities and shadow control bits */ + init_cpuid_fault(c); + probe_xeon_phi_r3mwait(c); + + msr = this_cpu_read(msr_misc_features_shadow); + wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); +} + static void init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c) init_intel_energy_perf(c); - probe_xeon_phi_r3mwait(c); + init_intel_misc_features(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f67591561711..0bb88428cbf2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -37,6 +37,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -124,11 +125,6 @@ void flush_thread(void) fpu__clear(&tsk->thread.fpu); } -static void hard_disable_TSC(void) -{ - cr4_set_bits(X86_CR4_TSD); -} - void disable_TSC(void) { preempt_disable(); @@ -137,15 +133,10 @@ void disable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_disable_TSC(); + cr4_set_bits(X86_CR4_TSD); preempt_enable(); } -static void hard_enable_TSC(void) -{ - cr4_clear_bits(X86_CR4_TSD); -} - static void enable_TSC(void) { preempt_disable(); @@ -154,7 +145,7 @@ static void enable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_enable_TSC(); + cr4_clear_bits(X86_CR4_TSD); preempt_enable(); } @@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val) return 0; } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) -{ - struct thread_struct *prev, *next; - - prev = &prev_p->thread; - next = &next_p->thread; +DEFINE_PER_CPU(u64, msr_misc_features_shadow); - if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ - test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); +static void set_cpuid_faulting(bool on) +{ + u64 msrval; - debugctl &= ~DEBUGCTLMSR_BTF; - if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) - debugctl |= DEBUGCTLMSR_BTF; + msrval = this_cpu_read(msr_misc_features_shadow); + msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; + msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); + this_cpu_write(msr_misc_features_shadow, msrval); + wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval); +} - update_debugctlmsr(debugctl); +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) { + /* + * Must flip the CPU state synchronously with + * TIF_NOCPUID in the current running context. + */ + set_cpuid_faulting(true); } + preempt_enable(); +} - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); +static void enable_cpuid(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) { + /* + * Must flip the CPU state synchronously with + * TIF_NOCPUID in the current running context. + */ + set_cpuid_faulting(false); } + preempt_enable(); +} + +static int get_cpuid_mode(void) +{ + return !test_thread_flag(TIF_NOCPUID); +} + +static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled) +{ + if (!static_cpu_has(X86_FEATURE_CPUID_FAULT)) + return -ENODEV; + + if (cpuid_enabled) + enable_cpuid(); + else + disable_cpuid(); + + return 0; +} + +/* + * Called immediately after a successful exec. + */ +void arch_setup_new_exec(void) +{ + /* If cpuid was previously disabled for this task, re-enable it. */ + if (test_thread_flag(TIF_NOCPUID)) + enable_cpuid(); +} - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { +static inline void switch_to_bitmap(struct tss_struct *tss, + struct thread_struct *prev, + struct thread_struct *next, + unsigned long tifp, unsigned long tifn) +{ + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. * Normally this is 128 bytes or less: */ memcpy(tss->io_bitmap, next->io_bitmap_ptr, max(prev->io_bitmap_max, next->io_bitmap_max)); - /* * Make sure that the TSS limit is correct for the CPU * to notice the IO bitmap. */ refresh_tss_limit(); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { + } else if (tifp & _TIF_IO_BITMAP) { /* * Clear any possible leftover bits: */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } +} + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long tifp, tifn; + + prev = &prev_p->thread; + next = &next_p->thread; + + tifn = READ_ONCE(task_thread_info(next_p)->flags); + tifp = READ_ONCE(task_thread_info(prev_p)->flags); + switch_to_bitmap(tss, prev, next, tifp, tifn); + propagate_user_return_notify(prev_p, next_p); + + if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && + arch_has_block_step()) { + unsigned long debugctl, msk; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~DEBUGCTLMSR_BTF; + msk = tifn & _TIF_BLOCKSTEP; + debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + } + + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); + + if ((tifp ^ tifn) & _TIF_NOCPUID) + set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); } /* @@ -550,3 +616,16 @@ unsigned long get_wchan(struct task_struct *p) put_task_stack(p); return ret; } + +long do_arch_prctl_common(struct task_struct *task, int option, + unsigned long cpuid_enabled) +{ + switch (option) { + case ARCH_GET_CPUID: + return get_cpuid_mode(); + case ARCH_SET_CPUID: + return set_cpuid_mode(task, cpuid_enabled); + } + + return -EINVAL; +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4c818f8bc135..ff40e74c9181 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include void __show_regs(struct pt_regs *regs, int all) { @@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } + +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) +{ + return do_arch_prctl_common(current, option, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d6b784a5520d..ea1a6180bf39 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, (struct user_desc __user *)tls, 0); else #endif - err = do_arch_prctl(p, ARCH_SET_FS, tls); + err = do_arch_prctl_64(p, ARCH_SET_FS, tls); if (err) goto out; } @@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) } #endif -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) +long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) { int ret = 0; int doit = task == current; int cpu; - switch (code) { + switch (option) { case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; - task->thread.gsbase = addr; + task->thread.gsbase = arg2; if (doit) { load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); } put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry with gs */ - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; - task->thread.fsbase = addr; + task->thread.fsbase = arg2; if (doit) { /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); } put_cpu(); break; case ARCH_GET_FS: { unsigned long base; + if (doit) rdmsrl(MSR_FS_BASE, base); else base = task->thread.fsbase; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)arg2); break; } case ARCH_GET_GS: { unsigned long base; + if (doit) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gsbase; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)arg2); break; } #ifdef CONFIG_CHECKPOINT_RESTORE # ifdef CONFIG_X86_X32_ABI case ARCH_MAP_VDSO_X32: - return prctl_map_vdso(&vdso_image_x32, addr); + return prctl_map_vdso(&vdso_image_x32, arg2); # endif # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case ARCH_MAP_VDSO_32: - return prctl_map_vdso(&vdso_image_32, addr); + return prctl_map_vdso(&vdso_image_32, arg2); # endif case ARCH_MAP_VDSO_64: - return prctl_map_vdso(&vdso_image_64, addr); + return prctl_map_vdso(&vdso_image_64, arg2); #endif default: @@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return ret; } -long sys_arch_prctl(int code, unsigned long addr) +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) +{ + long ret; + + ret = do_arch_prctl_64(current, option, arg2); + if (ret == -EINVAL) + ret = do_arch_prctl_common(current, option, arg2); + + return ret; +} + +#ifdef CONFIG_IA32_EMULATION +COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) { - return do_arch_prctl(current, code, addr); + return do_arch_prctl_common(current, option, arg2); } +#endif unsigned long KSTK_ESP(struct task_struct *task) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2364b23ea3e5..f37d18124648 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -396,12 +396,12 @@ static int putreg(struct task_struct *child, if (value >= TASK_SIZE_MAX) return -EIO; /* - * When changing the segment base, use do_arch_prctl + * When changing the segment base, use do_arch_prctl_64 * to set either thread.fs or thread.fsindex and the * corresponding GDT slot. */ if (child->thread.fsbase != value) - return do_arch_prctl(child, ARCH_SET_FS, value); + return do_arch_prctl_64(child, ARCH_SET_FS, value); return 0; case offsetof(struct user_regs_struct,gs_base): /* @@ -410,7 +410,7 @@ static int putreg(struct task_struct *child, if (value >= TASK_SIZE_MAX) return -EIO; if (child->thread.gsbase != value) - return do_arch_prctl(child, ARCH_SET_GS, value); + return do_arch_prctl_64(child, ARCH_SET_GS, value); return 0; #endif } @@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request, Works just like arch_prctl, except that the arguments are reversed. */ case PTRACE_ARCH_PRCTL: - ret = do_arch_prctl(child, data, addr); + ret = do_arch_prctl_64(child, data, addr); break; #endif diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index e7e7055a8658..69f0827d5f53 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \ ifeq ($(CONFIG_X86_32),y) -obj-y += checksum_32.o +obj-y += checksum_32.o syscalls_32.o obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index e59eef20647b..b291ca5cf66b 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx, return -ENOSYS; } -extern long arch_prctl(struct task_struct *task, int code, +extern long arch_prctl(struct task_struct *task, int option, unsigned long __user *addr); #endif diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c index 96eb2bd28832..8431e87ac333 100644 --- a/arch/x86/um/os-Linux/prctl.c +++ b/arch/x86/um/os-Linux/prctl.c @@ -6,7 +6,7 @@ #include #include -int os_arch_prctl(int pid, int code, unsigned long *addr) +int os_arch_prctl(int pid, int option, unsigned long *arg2) { - return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); + return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option); } diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000000000000..627d68836b16 --- /dev/null +++ b/arch/x86/um/syscalls_32.c @@ -0,0 +1,7 @@ +#include +#include + +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) +{ + return -EINVAL; +} diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index 10d907098c26..58f51667e2e4 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -7,13 +7,15 @@ #include #include +#include #include #include /* XXX This should get the constants from libc */ #include -long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) +long arch_prctl(struct task_struct *task, int option, + unsigned long __user *arg2) { - unsigned long *ptr = addr, tmp; + unsigned long *ptr = arg2, tmp; long ret; int pid = task->mm->context.id.u.pid; @@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) * arch_prctl is run on the host, then the registers are read * back. */ - switch (code) { + switch (option) { case ARCH_SET_FS: case ARCH_SET_GS: ret = restore_registers(pid, ¤t->thread.regs.regs); @@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) ptr = &tmp; } - ret = os_arch_prctl(pid, code, ptr); + ret = os_arch_prctl(pid, option, ptr); if (ret) return ret; - switch (code) { + switch (option) { case ARCH_SET_FS: current->thread.arch.fs = (unsigned long) ptr; ret = save_registers(pid, ¤t->thread.regs.regs); @@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) ret = save_registers(pid, ¤t->thread.regs.regs); break; case ARCH_GET_FS: - ret = put_user(tmp, addr); + ret = put_user(tmp, arg2); break; case ARCH_GET_GS: - ret = put_user(tmp, addr); + ret = put_user(tmp, arg2); break; } return ret; } -long sys_arch_prctl(int code, unsigned long addr) +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) { - return arch_prctl(current, code, (unsigned long __user *) addr); + return arch_prctl(current, option, (unsigned long __user *) arg2); } void arch_switch_to(struct task_struct *to) diff --git a/fs/exec.c b/fs/exec.c index 65145a3df065..72934df68471 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); + arch_setup_new_exec(); perf_event_exec(); __set_task_comm(current, kbasename(bprm->filename), true); diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..af9dbc44fd92 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 58373875e8ee..55125d674338 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n, { } #endif /* CONFIG_HARDENED_USERCOPY */ +#ifndef arch_setup_new_exec +static inline void arch_setup_new_exec(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index 2c9082ba6137..116b7735ee9f 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -148,6 +148,7 @@ cat << EOF #define __IGNORE_sysfs #define __IGNORE_uselib #define __IGNORE__sysctl +#define __IGNORE_arch_prctl /* ... including the "new" 32-bit uid syscalls */ #define __IGNORE_lchown32