From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752849AbcCLTGT (ORCPT ); Sat, 12 Mar 2016 14:06:19 -0500 Received: from mail-wm0-f67.google.com ([74.125.82.67]:33660 "EHLO mail-wm0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751028AbcCLTGH (ORCPT ); Sat, 12 Mar 2016 14:06:07 -0500 Date: Sat, 12 Mar 2016 20:06:01 +0100 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Thomas Gleixner , "H. Peter Anvin" , Borislav Petkov , Peter Zijlstra , Andrew Morton Subject: [GIT PULL] x86 fixes Message-ID: <20160312190601.GA32387@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Linus, Please pull the latest x86-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus # HEAD: 452308de61056a539352a9306c46716d7af8a1f1 x86/efi: Fix boot crash by always mapping boot service regions into new EFI page tables This fixes 3 FPU handling related bugs, an EFI boot crash and a runtime warning. The EFI fix arrived late but I didn't want to delay it to after v4.5 because the effects are pretty bad for the systems that are affected by it. Thanks, Ingo ------------------> Andy Lutomirski (1): x86/fpu: Fix 'no387' regression Borislav Petkov (2): x86/delay: Avoid preemptible context checks in delay_mwaitx() x86/fpu: Fix eager-FPU handling on legacy FPU machines Matt Fleming (1): x86/efi: Fix boot crash by always mapping boot service regions into new EFI page tables Yu-cheng Yu (1): x86/fpu: Revert ("x86/fpu: Disable AVX when eagerfpu is off") arch/x86/include/asm/fpu/xstate.h | 9 ++--- arch/x86/kernel/fpu/core.c | 4 +- arch/x86/kernel/fpu/init.c | 22 +++++------ arch/x86/lib/delay.c | 2 +- arch/x86/platform/efi/quirks.c | 79 ++++++++++++++++++++++++++++++--------- 5 files changed, 79 insertions(+), 37 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index af30fdeb140d..f23cd8c80b1c 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -20,16 +20,15 @@ /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE) - -/* Supported features which require eager state saving */ -#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM) +/* Supported features which require eager state saving */ +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) + /* All currently supported features */ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d25097c3fc1d..d5804adfa6da 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -409,8 +409,10 @@ static inline void copy_init_fpstate_to_fpregs(void) { if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); - else + else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); + else + copy_kernel_to_fregs(&init_fpstate.fsave); } /* diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6d9f0a7ef4c8..bd08fb77073d 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -78,13 +78,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + } #ifndef CONFIG_MATH_EMULATION if (!cpu_has_fpu) { @@ -132,7 +134,7 @@ static void __init fpu__init_system_generic(void) * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ - fpstate_init_fxstate(&init_fpstate.fxsave); + fpstate_init(&init_fpstate); fpu__init_system_mxcsr(); } @@ -300,12 +302,6 @@ u64 __init fpu__get_supported_xfeatures_mask(void) static void __init fpu__clear_eager_fpu_features(void) { setup_clear_cpu_cap(X86_FEATURE_MPX); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); } /* diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index e912b2f6d36e..2f07c291dcc8 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -102,7 +102,7 @@ static void delay_mwaitx(unsigned long __loops) * Use cpu_tss as a cacheline-aligned, seldomly * accessed per-cpu variable as the monitor target. */ - __monitorx(this_cpu_ptr(&cpu_tss), 0, 0); + __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); /* * AMD, like Intel, supports the EAX hint and EAX=0xf diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 2d66db8f80f9..ed30e79347e8 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -131,6 +131,27 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) EXPORT_SYMBOL_GPL(efi_query_variable_store); /* + * Helper function for efi_reserve_boot_services() to figure out if we + * can free regions in efi_free_boot_services(). + * + * Use this function to ensure we do not free regions owned by somebody + * else. We must only reserve (and then free) regions: + * + * - Not within any part of the kernel + * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc) + */ +static bool can_free_region(u64 start, u64 size) +{ + if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) + return false; + + if (!e820_all_mapped(start, start+size, E820_RAM)) + return false; + + return true; +} + +/* * The UEFI specification makes it clear that the operating system is free to do * whatever it wants with boot services code after ExitBootServices() has been * called. Ignoring this recommendation a significant bunch of EFI implementations @@ -147,26 +168,50 @@ void __init efi_reserve_boot_services(void) efi_memory_desc_t *md = p; u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; + bool already_reserved; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) continue; - /* Only reserve where possible: - * - Not within any already allocated areas - * - Not over any memory area (really needed, if above?) - * - Not within any part of the kernel - * - Not the bios reserved area - */ - if ((start + size > __pa_symbol(_text) - && start <= __pa_symbol(_end)) || - !e820_all_mapped(start, start+size, E820_RAM) || - memblock_is_region_reserved(start, size)) { - /* Could not reserve, skip it */ - md->num_pages = 0; - memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", - start, start+size-1); - } else + + already_reserved = memblock_is_region_reserved(start, size); + + /* + * Because the following memblock_reserve() is paired + * with free_bootmem_late() for this region in + * efi_free_boot_services(), we must be extremely + * careful not to reserve, and subsequently free, + * critical regions of memory (like the kernel image) or + * those regions that somebody else has already + * reserved. + * + * A good example of a critical region that must not be + * freed is page zero (first 4Kb of memory), which may + * contain boot services code/data but is marked + * E820_RESERVED by trim_bios_range(). + */ + if (!already_reserved) { memblock_reserve(start, size); + + /* + * If we are the first to reserve the region, no + * one else cares about it. We own it and can + * free it later. + */ + if (can_free_region(start, size)) + continue; + } + + /* + * We don't own the region. We must not free it. + * + * Setting this bit for a boot services region really + * doesn't make sense as far as the firmware is + * concerned, but it does provide us with a way to tag + * those regions that must not be paired with + * free_bootmem_late(). + */ + md->attribute |= EFI_MEMORY_RUNTIME; } } @@ -183,8 +228,8 @@ void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; - /* Could not reserve boot area */ - if (!size) + /* Do not free, someone else owns it: */ + if (md->attribute & EFI_MEMORY_RUNTIME) continue; free_bootmem_late(start, size);