LKML Archive on lore.kernel.org
 help / color / Atom feed
* [GIT PULL] x86 fixes
@ 2016-04-23 11:38 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-04-23 11:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ea5dfb5fae81939f777ca569d8cfb599252da2e8 x86 EDAC, sb_edac.c: Take account of channel hashing when needed

Misc fixes: two EDAC driver fixes, a Xen crash fix, a HyperV log spam fix and a 
documentation fix.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/edac/sb_edac.c             # ea5dfb5fae81: x86 EDAC, sb_edac.c: Take ac
                                   # ff15e95c8276: x86 EDAC, sb_edac.c: Repair 

 Thanks,

	Ingo

------------------>
Jan Beulich (1):
      x86/mm/xen: Suppress hugetlbfs in PV guests

Juergen Gross (1):
      x86/doc: Correct limits in Documentation/x86/x86_64/mm.txt

Tony Luck (2):
      x86 EDAC, sb_edac.c: Repair damage introduced when "fixing" channel address
      x86 EDAC, sb_edac.c: Take account of channel hashing when needed

Vitaly Kuznetsov (1):
      x86/hyperv: Avoid reporting bogus NMI status for Gen2 instances


 Documentation/x86/x86_64/mm.txt |  6 +++---
 arch/x86/include/asm/hugetlb.h  |  1 +
 arch/x86/kernel/cpu/mshyperv.c  | 12 ++++++++++++
 drivers/edac/sb_edac.c          | 30 ++++++++++++++++++++++++++----
 4 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index c518dce7da4d..5aa738346062 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -19,7 +19,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffffa0000000 (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
@@ -31,8 +31,8 @@ vmalloc space is lazily synchronized into the different PML4 pages of
 the processes using the page fault handler, with init_level4_pgt as
 reference.
 
-Current X86-64 implementations only support 40 bits of address space,
-but we support up to 46 bits. This expands into MBZ space in the page tables.
+Current X86-64 implementations support up to 46 bits of address space (64 TB),
+which is our current limit. This expands into MBZ space in the page tables.
 
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index f8a29d2c97b0..e6a8613fbfb0 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -4,6 +4,7 @@
 #include <asm/page.h>
 #include <asm-generic/hugetlb.h>
 
+#define hugepages_supported() cpu_has_pse
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4e7c6933691c..10c11b4da31d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static unsigned char hv_get_nmi_reason(void)
+{
+	return 0;
+}
+
 static void __init ms_hyperv_init_platform(void)
 {
 	/*
@@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void)
 	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
 #endif
 	mark_tsc_unstable("running on Hyper-V");
+
+	/*
+	 * Generation 2 instances don't support reading the NMI status from
+	 * 0x61 port.
+	 */
+	if (efi_enabled(EFI_BOOT))
+		x86_platform.get_nmi_reason = hv_get_nmi_reason;
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 93f0d4120289..468447aff8eb 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -362,6 +362,7 @@ struct sbridge_pvt {
 
 	/* Memory type detection */
 	bool			is_mirrored, is_lockstep, is_close_pg;
+	bool			is_chan_hash;
 
 	/* Fifo double buffers */
 	struct mce		mce_entry[MCE_LOG_LEN];
@@ -1060,6 +1061,20 @@ static inline u8 sad_pkg_ha(u8 pkg)
 	return (pkg >> 2) & 0x1;
 }
 
+static int haswell_chan_hash(int idx, u64 addr)
+{
+	int i;
+
+	/*
+	 * XOR even bits from 12:26 to bit0 of idx,
+	 *     odd bits from 13:27 to bit1
+	 */
+	for (i = 12; i < 28; i += 2)
+		idx ^= (addr >> i) & 3;
+
+	return idx;
+}
+
 /****************************************************************************
 			Memory check routines
  ****************************************************************************/
@@ -1616,6 +1631,10 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 		KNL_MAX_CHANNELS : NUM_CHANNELS;
 	u64 knl_mc_sizes[KNL_MAX_CHANNELS];
 
+	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
+		pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, &reg);
+		pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
+	}
 	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
 			pvt->info.type == KNIGHTS_LANDING)
 		pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, &reg);
@@ -2118,12 +2137,15 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	}
 
 	ch_way = TAD_CH(reg) + 1;
-	sck_way = 1 << TAD_SOCK(reg);
+	sck_way = TAD_SOCK(reg);
 
 	if (ch_way == 3)
 		idx = addr >> 6;
-	else
+	else {
 		idx = (addr >> (6 + sck_way + shiftup)) & 0x3;
+		if (pvt->is_chan_hash)
+			idx = haswell_chan_hash(idx, addr);
+	}
 	idx = idx % ch_way;
 
 	/*
@@ -2157,7 +2179,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		switch(ch_way) {
 		case 2:
 		case 4:
-			sck_xch = 1 << sck_way * (ch_way >> 1);
+			sck_xch = (1 << sck_way) * (ch_way >> 1);
 			break;
 		default:
 			sprintf(msg, "Invalid mirror set. Can't decode addr");
@@ -2193,7 +2215,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 
 	ch_addr = addr - offset;
 	ch_addr >>= (6 + shiftup);
-	ch_addr /= ch_way * sck_way;
+	ch_addr /= sck_xch;
 	ch_addr <<= (6 + shiftup);
 	ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-06-29  9:14 Ingo Molnar
  2019-06-29 11:45 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-06-29  9:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ae6a45a0868986f69039a2150d3b2b9ca294c378 x86/unwind/orc: Fall back to using frame pointers for generated code

Misc fixes all over the place:

- might_sleep() atomicity fix in the microcode loader
- resctrl boundary condition fix
- APIC arithmethics bug fix for frequencies >= 4.2 GHz
- three 5-level paging crash fixes
- two speculation fixes
- a perf/stacktrace fix

 Thanks,

	Ingo

------------------>
Alejandro Jimenez (1):
      x86/speculation: Allow guests to use SSBD even if host does not

Colin Ian King (1):
      x86/apic: Fix integer overflow on 10 bit left shift of cpu_khz

Josh Poimboeuf (1):
      x86/unwind/orc: Fall back to using frame pointers for generated code

Kirill A. Shutemov (3):
      x86/boot/64: Fix crash if kernel image crosses page table boundary
      x86/boot/64: Add missing fixup_pointer() for next_early_pgt access
      x86/mm: Handle physical-virtual alignment mismatch in phys_p4d_init()

Reinette Chatre (1):
      x86/resctrl: Prevent possible overrun during bitmap operations

Song Liu (1):
      perf/x86: Always store regs->ip in perf_callchain_kernel()

Thomas Gleixner (1):
      x86/microcode: Fix the microcode load on CPU hotplug for real


 arch/x86/events/core.c                 | 10 +++++-----
 arch/x86/kernel/apic/apic.c            |  3 ++-
 arch/x86/kernel/cpu/bugs.c             | 11 ++++++++++-
 arch/x86/kernel/cpu/microcode/core.c   | 15 ++++++++++-----
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++++++++++++++++------------------
 arch/x86/kernel/head64.c               | 20 ++++++++++---------
 arch/x86/kernel/unwind_orc.c           | 26 +++++++++++++++++++++----
 arch/x86/mm/init_64.c                  | 24 ++++++++++++-----------
 8 files changed, 89 insertions(+), 55 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425d8468..4fb3ca1e699d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 		return;
 	}
 
-	if (perf_hw_regs(regs)) {
-		if (perf_callchain_store(entry, regs->ip))
-			return;
+	if (perf_callchain_store(entry, regs->ip))
+		return;
+
+	if (perf_hw_regs(regs))
 		unwind_start(&state, current, regs, NULL);
-	} else {
+	else
 		unwind_start(&state, current, NULL, (void *)regs->sp);
-	}
 
 	for (; !unwind_done(&state); unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 177aa8ef2afa..85be316665b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1464,7 +1464,8 @@ static void apic_pending_intr_clear(void)
 		if (queued) {
 			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
 				ntsc = rdtsc();
-				max_loops = (cpu_khz << 10) - (ntsc - tsc);
+				max_loops = (long long)cpu_khz << 10;
+				max_loops -= ntsc - tsc;
 			} else {
 				max_loops--;
 			}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 		break;
 	}
 
+	/*
+	 * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+	 * bit in the mask to allow guests to use the mitigation even in the
+	 * case where the host does not enable it.
+	 */
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+	    static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+		x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+	}
+
 	/*
 	 * We have three CPU feature flags that are in play here:
 	 *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 			x86_amd_ssb_disable();
 		} else {
 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
-			x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 		}
 	}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index a813987b5552..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
 	.resume			= mc_bp_resume,
 };
 
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
 {
-	struct device *dev;
-
-	dev = get_cpu_device(cpu);
 	microcode_update_cpu(cpu);
 	pr_debug("CPU%d added\n", cpu);
+	return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
 
 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
 		pr_err("Failed to create group for CPU%d\n", cpu);
@@ -872,7 +875,9 @@ int __init microcode_init(void)
 		goto out_ucode_group;
 
 	register_syscore_ops(&mc_syscore_ops);
-	cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
+	cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+				  mc_cpu_starting, NULL);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
 				  mc_cpu_online, mc_cpu_down_prep);
 
 	pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 869cbef5da81..f9d8ed6ab03b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
 			      struct seq_file *seq, void *v)
 {
 	struct rdt_resource *r = of->kn->parent->priv;
-	u32 sw_shareable = 0, hw_shareable = 0;
-	u32 exclusive = 0, pseudo_locked = 0;
+	/*
+	 * Use unsigned long even though only 32 bits are used to ensure
+	 * test_bit() is used safely.
+	 */
+	unsigned long sw_shareable = 0, hw_shareable = 0;
+	unsigned long exclusive = 0, pseudo_locked = 0;
 	struct rdt_domain *dom;
 	int i, hwb, swb, excl, psl;
 	enum rdtgrp_mode mode;
@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
 		}
 		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
-			hwb = test_bit(i, (unsigned long *)&hw_shareable);
-			swb = test_bit(i, (unsigned long *)&sw_shareable);
-			excl = test_bit(i, (unsigned long *)&exclusive);
-			psl = test_bit(i, (unsigned long *)&pseudo_locked);
+			hwb = test_bit(i, &hw_shareable);
+			swb = test_bit(i, &sw_shareable);
+			excl = test_bit(i, &exclusive);
+			psl = test_bit(i, &pseudo_locked);
 			if (hwb && swb)
 				seq_putc(seq, 'X');
 			else if (hwb && !swb)
@@ -2494,26 +2498,19 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
  */
 static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
 {
-	/*
-	 * Convert the u32 _val to an unsigned long required by all the bit
-	 * operations within this function. No more than 32 bits of this
-	 * converted value can be accessed because all bit operations are
-	 * additionally provided with cbm_len that is initialized during
-	 * hardware enumeration using five bits from the EAX register and
-	 * thus never can exceed 32 bits.
-	 */
-	unsigned long *val = (unsigned long *)_val;
+	unsigned long val = *_val;
 	unsigned int cbm_len = r->cache.cbm_len;
 	unsigned long first_bit, zero_bit;
 
-	if (*val == 0)
+	if (val == 0)
 		return;
 
-	first_bit = find_first_bit(val, cbm_len);
-	zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+	first_bit = find_first_bit(&val, cbm_len);
+	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
 
 	/* Clear any remaining bits to ensure contiguous region */
-	bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+	*_val = (u32)val;
 }
 
 /*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..29ffa495bd1c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
 
 	if (la57) {
-		p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+		p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
+				    physaddr);
 
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
 		pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
 		pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
 
-		i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
-		p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
-		p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
+		i = physaddr >> P4D_SHIFT;
+		p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+		p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
 	} else {
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
 		pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
 		pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
 	}
 
-	i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
-	pud[i + 0] = (pudval_t)pmd + pgtable_flags;
-	pud[i + 1] = (pudval_t)pmd + pgtable_flags;
+	i = physaddr >> PUD_SHIFT;
+	pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+	pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
 
 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
 	/* Filter out unsupported __PAGE_KERNEL_* bits: */
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	pmd_entry +=  physaddr;
 
 	for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
-		int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
-		pmd[idx] = pmd_entry + i * PMD_SIZE;
+		int idx = i + (physaddr >> PMD_SHIFT);
+
+		pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
 	}
 
 	/*
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 33b66b5c5aec..72b997eaa1fc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
  * But they are copies of the ftrace entries that are static and
  * defined in ftrace_*.S, which do have orc entries.
  *
- * If the undwinder comes across a ftrace trampoline, then find the
+ * If the unwinder comes across a ftrace trampoline, then find the
  * ftrace function that was used to create it, and use that ftrace
- * function's orc entrie, as the placement of the return code in
+ * function's orc entry, as the placement of the return code in
  * the stack will be identical.
  */
 static struct orc_entry *orc_ftrace_find(unsigned long ip)
@@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = {
 	.type = ORC_TYPE_CALL
 };
 
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+	.type		= ORC_TYPE_CALL,
+	.sp_reg		= ORC_REG_BP,
+	.sp_offset	= 16,
+	.bp_reg		= ORC_REG_PREV_SP,
+	.bp_offset	= -16,
+	.end		= 0,
+};
+
 static struct orc_entry *orc_find(unsigned long ip)
 {
 	static struct orc_entry *orc;
@@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
 	 * calls and calls to noreturn functions.
 	 */
 	orc = orc_find(state->signal ? state->ip : state->ip - 1);
-	if (!orc)
-		goto err;
+	if (!orc) {
+		/*
+		 * As a fallback, try to assume this code uses a frame pointer.
+		 * This is useful for generated code, like BPF, which ORC
+		 * doesn't know about.  This is just a guess, so the rest of
+		 * the unwind is no longer considered reliable.
+		 */
+		orc = &orc_fp_entry;
+		state->error = true;
+	}
 
 	/* End-of-stack check for kernel threads: */
 	if (orc->sp_reg == ORC_REG_UNDEFINED) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 693aaf28d5fe..0f01c7b1d217 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,23 +671,25 @@ static unsigned long __meminit
 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 	      unsigned long page_size_mask, bool init)
 {
-	unsigned long paddr_next, paddr_last = paddr_end;
-	unsigned long vaddr = (unsigned long)__va(paddr);
-	int i = p4d_index(vaddr);
+	unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
+
+	paddr_last = paddr_end;
+	vaddr = (unsigned long)__va(paddr);
+	vaddr_end = (unsigned long)__va(paddr_end);
 
 	if (!pgtable_l5_enabled())
 		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
 				     page_size_mask, init);
 
-	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
-		p4d_t *p4d;
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		p4d_t *p4d = p4d_page + p4d_index(vaddr);
 		pud_t *pud;
 
-		vaddr = (unsigned long)__va(paddr);
-		p4d = p4d_page + p4d_index(vaddr);
-		paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+		vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
+		paddr = __pa(vaddr);
 
 		if (paddr >= paddr_end) {
+			paddr_next = __pa(vaddr_next);
 			if (!after_bootmem &&
 			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
 					     E820_TYPE_RAM) &&
@@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 
 		if (!p4d_none(*p4d)) {
 			pud = pud_offset(p4d, 0);
-			paddr_last = phys_pud_init(pud, paddr, paddr_end,
-						   page_size_mask, init);
+			paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
+					page_size_mask, init);
 			continue;
 		}
 
 		pud = alloc_low_page();
-		paddr_last = phys_pud_init(pud, paddr, paddr_end,
+		paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
 					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-06-02 17:44 Ingo Molnar
  2019-06-02 18:15 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-06-02 17:44 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 2ac44ab608705948564791ce1d15d43ba81a1e38 x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor

Two fixes: a quirk for KVM guests running on certain AMD CPUs, and a 
KASAN related build fix.

 Thanks,

	Ingo

------------------>
Ard Biesheuvel (1):
      x86/boot: Provide KASAN compatible aliases for string routines

Frank van der Linden (1):
      x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor


 arch/x86/boot/compressed/string.c | 14 ++++++++++----
 arch/x86/kernel/cpu/amd.c         |  7 +++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 19dbbcdd1a53..81fc1eaa3229 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -11,7 +11,7 @@
 #include "../string.c"
 
 #ifdef CONFIG_X86_32
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
 {
 	int d0, d1, d2;
 	asm volatile(
@@ -25,7 +25,7 @@ static void *__memcpy(void *dest, const void *src, size_t n)
 	return dest;
 }
 #else
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
 {
 	long d0, d1, d2;
 	asm volatile(
@@ -56,7 +56,7 @@ void *memmove(void *dest, const void *src, size_t n)
 	const unsigned char *s = src;
 
 	if (d <= s || d - s >= n)
-		return __memcpy(dest, src, n);
+		return ____memcpy(dest, src, n);
 
 	while (n-- > 0)
 		d[n] = s[n];
@@ -71,5 +71,11 @@ void *memcpy(void *dest, const void *src, size_t n)
 		warn("Avoiding potentially unsafe overlapping memcpy()!");
 		return memmove(dest, src, n);
 	}
-	return __memcpy(dest, src, n);
+	return ____memcpy(dest, src, n);
 }
+
+#ifdef CONFIG_KASAN
+extern void *__memset(void *s, int c, size_t n) __alias(memset);
+extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove);
+extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy);
+#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 80a405c2048a..8d4e50428b68 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -824,8 +824,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
 {
 	set_cpu_cap(c, X86_FEATURE_ZEN);
 
-	/* Fix erratum 1076: CPB feature bit not being set in CPUID. */
-	if (!cpu_has(c, X86_FEATURE_CPB))
+	/*
+	 * Fix erratum 1076: CPB feature bit not being set in CPUID.
+	 * Always set it, except when running under a hypervisor.
+	 */
+	if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
 		set_cpu_cap(c, X86_FEATURE_CPB);
 }
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-05-16 16:26 Ingo Molnar
  2019-05-16 18:20 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-05-16 16:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 9d8d0294e78a164d407133dea05caf4b84247d6a x86/speculation/mds: Improve CPU buffer clear documentation

Misc fixes and updates:

 - a handful of MDS documentation/comment updates
 - a cleanup related to hweight interfaces
 - a SEV guest fix for large pages
 - a kprobes LTO fix
 - and a final cleanup commit for vDSO HPET support removal.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  lib/hweight.c                      # 409ca45526a4: x86/kconfig: Disable CONFIG_

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      x86/kprobes: Make trampoline_handler() global and visible

Andy Lutomirski (2):
      x86/speculation/mds: Revert CPU buffer clear on double fault exit
      x86/speculation/mds: Improve CPU buffer clear documentation

Brijesh Singh (1):
      x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page

Jia Zhang (1):
      x86/vdso: Remove hpet_page from vDSO

Masahiro Yamada (1):
      x86/kconfig: Disable CONFIG_GENERIC_HWEIGHT and remove __HAVE_ARCH_SW_HWEIGHT


 Documentation/x86/mds.rst           |  44 ++---------
 arch/x86/Kconfig                    |   3 -
 arch/x86/entry/vdso/vdso2c.c        |   3 -
 arch/x86/include/asm/arch_hweight.h |   2 -
 arch/x86/include/asm/vdso.h         |   1 -
 arch/x86/kernel/kprobes/core.c      |   2 +-
 arch/x86/kernel/traps.c             |   8 --
 arch/x86/mm/init_64.c               | 144 ++++++++++++++++++++++++++----------
 arch/x86/mm/mem_encrypt.c           |  10 ++-
 arch/x86/mm/mm_internal.h           |   3 +
 lib/hweight.c                       |   4 -
 11 files changed, 121 insertions(+), 103 deletions(-)

diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
index 534e9baa4e1d..5d4330be200f 100644
--- a/Documentation/x86/mds.rst
+++ b/Documentation/x86/mds.rst
@@ -142,45 +142,13 @@ Mitigation points
    mds_user_clear.
 
    The mitigation is invoked in prepare_exit_to_usermode() which covers
-   most of the kernel to user space transitions. There are a few exceptions
-   which are not invoking prepare_exit_to_usermode() on return to user
-   space. These exceptions use the paranoid exit code.
+   all but one of the kernel to user space transitions.  The exception
+   is when we return from a Non Maskable Interrupt (NMI), which is
+   handled directly in do_nmi().
 
-   - Non Maskable Interrupt (NMI):
-
-     Access to sensible data like keys, credentials in the NMI context is
-     mostly theoretical: The CPU can do prefetching or execute a
-     misspeculated code path and thereby fetching data which might end up
-     leaking through a buffer.
-
-     But for mounting other attacks the kernel stack address of the task is
-     already valuable information. So in full mitigation mode, the NMI is
-     mitigated on the return from do_nmi() to provide almost complete
-     coverage.
-
-   - Double fault (#DF):
-
-     A double fault is usually fatal, but the ESPFIX workaround, which can
-     be triggered from user space through modify_ldt(2) is a recoverable
-     double fault. #DF uses the paranoid exit path, so explicit mitigation
-     in the double fault handler is required.
-
-   - Machine Check Exception (#MC):
-
-     Another corner case is a #MC which hits between the CPU buffer clear
-     invocation and the actual return to user. As this still is in kernel
-     space it takes the paranoid exit path which does not clear the CPU
-     buffers. So the #MC handler repopulates the buffers to some
-     extent. Machine checks are not reliably controllable and the window is
-     extremly small so mitigation would just tick a checkbox that this
-     theoretical corner case is covered. To keep the amount of special
-     cases small, ignore #MC.
-
-   - Debug Exception (#DB):
-
-     This takes the paranoid exit path only when the INT1 breakpoint is in
-     kernel space. #DB on a user space address takes the regular exit path,
-     so no extra mitigation required.
+   (The reason that NMI is special is that prepare_exit_to_usermode() can
+    enable IRQs.  In NMI context, NMIs are blocked, and we don't want to
+    enable IRQs with NMIs blocked.)
 
 
 2. C-State transition
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 326b2d5bab9d..6bc9dd6e7534 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -259,9 +259,6 @@ config GENERIC_BUG
 config GENERIC_BUG_RELATIVE_POINTERS
 	bool
 
-config GENERIC_HWEIGHT
-	def_bool y
-
 config ARCH_MAY_HAVE_PC_FDC
 	def_bool y
 	depends on ISA_DMA_API
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8e470b018512..3a4d8d4d39f8 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -73,14 +73,12 @@ const char *outfilename;
 enum {
 	sym_vvar_start,
 	sym_vvar_page,
-	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
 };
 
 const int special_pages[] = {
 	sym_vvar_page,
-	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
 };
@@ -93,7 +91,6 @@ struct vdso_sym {
 struct vdso_sym required_syms[] = {
 	[sym_vvar_start] = {"vvar_start", true},
 	[sym_vvar_page] = {"vvar_page", true},
-	[sym_hpet_page] = {"hpet_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
 	{"VDSO32_NOTE_MASK", true},
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index fc0693569f7a..ba88edd0d58b 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -12,8 +12,6 @@
 #define REG_OUT "a"
 #endif
 
-#define __HAVE_ARCH_SW_HWEIGHT
-
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res;
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..230474e2ddb5 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -19,7 +19,6 @@ struct vdso_image {
 	long sym_vvar_start;  /* Negative offset to the vvar area */
 
 	long sym_vvar_page;
-	long sym_hpet_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
 	long sym_VDSO32_NOTE_MASK;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index cf52ee0d8711..9e4fa2484d10 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -768,7 +768,7 @@ static struct kprobe kretprobe_kprobe = {
 /*
  * Called from kretprobe_trampoline
  */
-static __used void *trampoline_handler(struct pt_regs *regs)
+__used __visible void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb;
 	struct kretprobe_instance *ri = NULL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7de466eb960b..8b6d03e55d2f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,7 +58,6 @@
 #include <asm/alternative.h>
 #include <asm/fpu/xstate.h>
 #include <asm/trace/mpx.h>
-#include <asm/nospec-branch.h>
 #include <asm/mpx.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
@@ -368,13 +367,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 		regs->ip = (unsigned long)general_protection;
 		regs->sp = (unsigned long)&gpregs->orig_ax;
 
-		/*
-		 * This situation can be triggered by userspace via
-		 * modify_ldt(2) and the return does not take the regular
-		 * user space exit, so a CPU buffer clear is required when
-		 * MDS mitigation is enabled.
-		 */
-		mds_user_clear_cpu_buffers();
 		return;
 	}
 #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 20d14254b686..62fc457f3849 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -58,6 +58,37 @@
 
 #include "ident_map.c"
 
+#define DEFINE_POPULATE(fname, type1, type2, init)		\
+static inline void fname##_init(struct mm_struct *mm,		\
+		type1##_t *arg1, type2##_t *arg2, bool init)	\
+{								\
+	if (init)						\
+		fname##_safe(mm, arg1, arg2);			\
+	else							\
+		fname(mm, arg1, arg2);				\
+}
+
+DEFINE_POPULATE(p4d_populate, p4d, pud, init)
+DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
+DEFINE_POPULATE(pud_populate, pud, pmd, init)
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+#define DEFINE_ENTRY(type1, type2, init)			\
+static inline void set_##type1##_init(type1##_t *arg1,		\
+			type2##_t arg2, bool init)		\
+{								\
+	if (init)						\
+		set_##type1##_safe(arg1, arg2);			\
+	else							\
+		set_##type1(arg1, arg2);			\
+}
+
+DEFINE_ENTRY(p4d, p4d, init)
+DEFINE_ENTRY(pud, pud, init)
+DEFINE_ENTRY(pmd, pmd, init)
+DEFINE_ENTRY(pte, pte, init)
+
+
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
@@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
  */
 static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
-	      pgprot_t prot)
+	      pgprot_t prot, bool init)
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
@@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_pte_safe(pte, __pte(0));
+				set_pte_init(pte, __pte(0), init);
 			continue;
 		}
 
@@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 			pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
 				pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		pages++;
-		set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+		set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
 		paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
 	}
 
@@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
  */
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
-	      unsigned long page_size_mask, pgprot_t prot)
+	      unsigned long page_size_mask, pgprot_t prot, bool init)
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
@@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & PMD_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_pmd_safe(pmd, __pmd(0));
+				set_pmd_init(pmd, __pmd(0), init);
 			continue;
 		}
 
@@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 				spin_lock(&init_mm.page_table_lock);
 				pte = (pte_t *)pmd_page_vaddr(*pmd);
 				paddr_last = phys_pte_init(pte, paddr,
-							   paddr_end, prot);
+							   paddr_end, prot,
+							   init);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 		if (page_size_mask & (1<<PG_LEVEL_2M)) {
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
-			set_pte_safe((pte_t *)pmd,
-				pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
-					__pgprot(pgprot_val(prot) | _PAGE_PSE)));
+			set_pte_init((pte_t *)pmd,
+				     pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
+					     __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+				     init);
 			spin_unlock(&init_mm.page_table_lock);
 			paddr_last = paddr_next;
 			continue;
 		}
 
 		pte = alloc_low_page();
-		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
+		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		pmd_populate_kernel_safe(&init_mm, pmd, pte);
+		pmd_populate_kernel_init(&init_mm, pmd, pte, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
@@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
  */
 static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
-	      unsigned long page_size_mask)
+	      unsigned long page_size_mask, bool init)
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
@@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & PUD_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_pud_safe(pud, __pud(0));
+				set_pud_init(pud, __pud(0), init);
 			continue;
 		}
 
@@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 				paddr_last = phys_pmd_init(pmd, paddr,
 							   paddr_end,
 							   page_size_mask,
-							   prot);
+							   prot, init);
 				continue;
 			}
 			/*
@@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 		if (page_size_mask & (1<<PG_LEVEL_1G)) {
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
-			set_pte_safe((pte_t *)pud,
-				pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
-					PAGE_KERNEL_LARGE));
+			set_pte_init((pte_t *)pud,
+				     pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+					     PAGE_KERNEL_LARGE),
+				     init);
 			spin_unlock(&init_mm.page_table_lock);
 			paddr_last = paddr_next;
 			continue;
@@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 
 		pmd = alloc_low_page();
 		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
-					   page_size_mask, prot);
+					   page_size_mask, prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		pud_populate_safe(&init_mm, pud, pmd);
+		pud_populate_init(&init_mm, pud, pmd, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 
@@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 
 static unsigned long __meminit
 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
-	      unsigned long page_size_mask)
+	      unsigned long page_size_mask, bool init)
 {
 	unsigned long paddr_next, paddr_last = paddr_end;
 	unsigned long vaddr = (unsigned long)__va(paddr);
 	int i = p4d_index(vaddr);
 
 	if (!pgtable_l5_enabled())
-		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+				     page_size_mask, init);
 
 	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
 		p4d_t *p4d;
@@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_p4d_safe(p4d, __p4d(0));
+				set_p4d_init(p4d, __p4d(0), init);
 			continue;
 		}
 
 		if (!p4d_none(*p4d)) {
 			pud = pud_offset(p4d, 0);
-			paddr_last = phys_pud_init(pud, paddr,
-					paddr_end,
-					page_size_mask);
+			paddr_last = phys_pud_init(pud, paddr, paddr_end,
+						   page_size_mask, init);
 			continue;
 		}
 
 		pud = alloc_low_page();
 		paddr_last = phys_pud_init(pud, paddr, paddr_end,
-					   page_size_mask);
+					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		p4d_populate_safe(&init_mm, p4d, pud);
+		p4d_populate_init(&init_mm, p4d, pud, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 
 	return paddr_last;
 }
 
-/*
- * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
- */
-unsigned long __meminit
-kernel_physical_mapping_init(unsigned long paddr_start,
-			     unsigned long paddr_end,
-			     unsigned long page_size_mask)
+static unsigned long __meminit
+__kernel_physical_mapping_init(unsigned long paddr_start,
+			       unsigned long paddr_end,
+			       unsigned long page_size_mask,
+			       bool init)
 {
 	bool pgd_changed = false;
 	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 			p4d = (p4d_t *)pgd_page_vaddr(*pgd);
 			paddr_last = phys_p4d_init(p4d, __pa(vaddr),
 						   __pa(vaddr_end),
-						   page_size_mask);
+						   page_size_mask,
+						   init);
 			continue;
 		}
 
 		p4d = alloc_low_page();
 		paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
-					   page_size_mask);
+					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);
 		if (pgtable_l5_enabled())
-			pgd_populate_safe(&init_mm, pgd, p4d);
+			pgd_populate_init(&init_mm, pgd, p4d, init);
 		else
-			p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
+			p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
+					  (pud_t *) p4d, init);
+
 		spin_unlock(&init_mm.page_table_lock);
 		pgd_changed = true;
 	}
@@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 	return paddr_last;
 }
 
+
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. Note that it can only be used to populate non-present entries.
+ * The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
+unsigned long __meminit
+kernel_physical_mapping_init(unsigned long paddr_start,
+			     unsigned long paddr_end,
+			     unsigned long page_size_mask)
+{
+	return __kernel_physical_mapping_init(paddr_start, paddr_end,
+					      page_size_mask, true);
+}
+
+/*
+ * This function is similar to kernel_physical_mapping_init() above with the
+ * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
+ * when updating the mapping. The caller is responsible to flush the TLBs after
+ * the function returns.
+ */
+unsigned long __meminit
+kernel_physical_mapping_change(unsigned long paddr_start,
+			       unsigned long paddr_end,
+			       unsigned long page_size_mask)
+{
+	return __kernel_physical_mapping_init(paddr_start, paddr_end,
+					      page_size_mask, false);
+}
+
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 385afa2b9e17..51f50a7a07ef 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 		else
 			split_page_size_mask = 1 << PG_LEVEL_2M;
 
-		kernel_physical_mapping_init(__pa(vaddr & pmask),
-					     __pa((vaddr_end & pmask) + psize),
-					     split_page_size_mask);
+		/*
+		 * kernel_physical_mapping_change() does not flush the TLBs, so
+		 * a TLB flush is required after we exit from the for loop.
+		 */
+		kernel_physical_mapping_change(__pa(vaddr & pmask),
+					       __pa((vaddr_end & pmask) + psize),
+					       split_page_size_mask);
 	}
 
 	ret = 0;
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 319bde386d5f..eeae142062ed 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
 unsigned long kernel_physical_mapping_init(unsigned long start,
 					     unsigned long end,
 					     unsigned long page_size_mask);
+unsigned long kernel_physical_mapping_change(unsigned long start,
+					     unsigned long end,
+					     unsigned long page_size_mask);
 void zone_sizes_init(void);
 
 extern int after_bootmem;
diff --git a/lib/hweight.c b/lib/hweight.c
index 7660d88fd496..c94586b62551 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -10,7 +10,6 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
-#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -27,7 +26,6 @@ unsigned int __sw_hweight32(unsigned int w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight32);
-#endif
 
 unsigned int __sw_hweight16(unsigned int w)
 {
@@ -46,7 +44,6 @@ unsigned int __sw_hweight8(unsigned int w)
 }
 EXPORT_SYMBOL(__sw_hweight8);
 
-#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
@@ -69,4 +66,3 @@ unsigned long __sw_hweight64(__u64 w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight64);
-#endif

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-04-27 14:42 Ingo Molnar
  2019-04-27 18:45 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-04-27 14:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 0d02113b31b2017dd349ec9df2314e798a90fa6e x86/mm: Fix a crash with kmemleak_scan()

Two fixes:

 - Fix an early boot crash in the RSDP parsing code by effectively 
   turning off the parsing call - we ran out of time but want to fix the
   regression. The more involved fix is being worked on.

 - Fix a crash that can trigger in the kmemlek code.

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/boot: Disable RSDP parsing temporarily

Qian Cai (1):
      x86/mm: Fix a crash with kmemleak_scan()


 arch/x86/boot/compressed/misc.c | 2 +-
 arch/x86/mm/init.c              | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..5a237e8dbf8d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	boot_params->hdr.loadflags &= ~KASLR_FLAG;
 
 	/* Save RSDP address for later use. */
-	boot_params->acpi_rsdp_addr = get_rsdp_addr();
+	/* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
 
 	sanitize_boot_params(boot_params);
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f905a2371080..8dacdb96899e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,7 @@
 #include <linux/memblock.h>
 #include <linux/swapfile.h>
 #include <linux/swapops.h>
+#include <linux/kmemleak.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
 	if (debug_pagealloc_enabled()) {
 		pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
 			begin, end - 1);
+		/*
+		 * Inform kmemleak about the hole in the memory since the
+		 * corresponding pages will be unmapped.
+		 */
+		kmemleak_free_part((void *)begin, end - begin);
 		set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 	} else {
 		/*

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-04-20  7:38 Ingo Molnar
  2019-04-20 19:25 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-04-20  7:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 2ee27796f298b710992a677a7e4d35c8c588b17e x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority

Misc fixes all over the place: a console spam fix, section attributes 
fixes, a KASLR fix, a TLB stack-variable alignment fix, a reboot quirk, 
boot options related warnings fix, an LTO fix, a deadlock fix and an RDT 
fix.

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      x86/cpu/bugs: Use __initconst for 'const' init data

Baoquan He (1):
      x86/mm/KASLR: Fix the size of the direct mapping section

Colin Ian King (1):
      x86/Kconfig: Fix spelling mistake "effectivness" -> "effectiveness"

Hans de Goede (1):
      x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority

Jian-Hong Pan (1):
      x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T

Peter Zijlstra (1):
      x86/mm/tlb: Revert "x86/mm: Align TLB invalidation info"

Sami Tolvanen (1):
      x86/build/lto: Fix truncated .bss with -fdata-sections

Thomas Gleixner (2):
      x86/speculation: Prevent deadlock on ssb_state::lock
      x86/mm: Prevent bogus warnings with "noexec=off"

Xiaochen Shen (1):
      x86/resctrl: Do not repeat rdtgroup mode initialization


 arch/x86/Kconfig                       |  2 +-
 arch/x86/kernel/cpu/bugs.c             |  6 +++---
 arch/x86/kernel/cpu/intel.c            |  4 ++--
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  3 ++-
 arch/x86/kernel/process.c              |  8 ++++++--
 arch/x86/kernel/reboot.c               | 21 +++++++++++++++++++++
 arch/x86/kernel/vmlinux.lds.S          |  2 +-
 arch/x86/mm/dump_pagetables.c          |  3 ++-
 arch/x86/mm/ioremap.c                  |  2 +-
 arch/x86/mm/kaslr.c                    |  2 +-
 arch/x86/mm/tlb.c                      |  2 +-
 include/linux/efi.h                    |  7 ++++++-
 12 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..01dbb05bc498 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1499,7 +1499,7 @@ config X86_CPA_STATISTICS
 	depends on DEBUG_FS
 	---help---
 	  Expose statistics about the Change Page Attribute mechanims, which
-	  helps to determine the effectivness of preserving large and huge
+	  helps to determine the effectiveness of preserving large and huge
 	  page mappings when mapping protections are changed.
 
 config ARCH_HAS_MEM_ENCRYPT
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2da82eff0eb4..b91b3bfa5cfb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -275,7 +275,7 @@ static const struct {
 	const char			*option;
 	enum spectre_v2_user_cmd	cmd;
 	bool				secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
 	{ "auto",		SPECTRE_V2_USER_CMD_AUTO,		false },
 	{ "off",		SPECTRE_V2_USER_CMD_NONE,		false },
 	{ "on",			SPECTRE_V2_USER_CMD_FORCE,		true  },
@@ -419,7 +419,7 @@ static const struct {
 	const char *option;
 	enum spectre_v2_mitigation_cmd cmd;
 	bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
@@ -658,7 +658,7 @@ static const char * const ssb_strings[] = {
 static const struct {
 	const char *option;
 	enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[]  __initdata = {
+} ssb_mitigation_options[]  __initconst = {
 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df5..3142fd7a9b32 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
 	if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
 		return;
 
-	pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
-	pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+	pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+	pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
 	epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
 	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
 }
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 54b9eef3eea9..85212a32b54d 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 			rdt_last_cmd_puts("Failed to initialize allocations\n");
 			return ret;
 		}
-		rdtgrp->mode = RDT_MODE_SHAREABLE;
 	}
 
+	rdtgrp->mode = RDT_MODE_SHAREABLE;
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a..957eae13b370 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 	u64 msr = x86_spec_ctrl_base;
 	bool updmsr = false;
 
+	lockdep_assert_irqs_disabled();
+
 	/*
 	 * If TIF_SSBD is different, select the proper mitigation
 	 * method. Note that if SSBD mitigation is disabled or permanentely
@@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
 
 void speculation_ctrl_update(unsigned long tif)
 {
+	unsigned long flags;
+
 	/* Forced update. Make sure all relevant TIF flags are different */
-	preempt_disable();
+	local_irq_save(flags);
 	__speculation_ctrl_update(~tif, tif);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 /* Called from seccomp/prctl update */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c0..8fd3cedd9acc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 	return 0;
 }
 
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+	if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+		reboot_type = BOOT_EFI;
+		pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+	}
+	return 0;
+}
+
 void __noreturn machine_real_restart(unsigned int type)
 {
 	local_irq_disable();
@@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
 		},
 	},
+	{	/* Handle reboot issue on Acer TravelMate X514-51T */
+		.callback = set_efi_reboot,
+		.ident = "Acer TravelMate X514-51T",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+		},
+	},
 
 	/* Apple */
 	{	/* Handle problems with rebooting on Apple MacBook5 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6e..a5127b2c195f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -362,7 +362,7 @@ SECTIONS
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		__bss_start = .;
 		*(.bss..page_aligned)
-		*(.bss)
+		*(BSS_MAIN)
 		BSS_DECRYPTED
 		. = ALIGN(PAGE_SIZE);
 		__bss_stop = .;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab46941..c0309ea9abee 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st)
 #endif
 	/* Account the WX pages */
 	st->wx_pages += npages;
-	WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+	WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+		  "x86/mm: Found insecure W+X mapping at address %pS\n",
 		  (void *)st->start_address);
 }
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0029604af8a4..dd73d5d74393 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
 	pte = early_ioremap_pte(addr);
 
 	/* Sanitize 'prot' against any unsupported bits: */
-	pgprot_val(flags) &= __default_kernel_pte_mask;
+	pgprot_val(flags) &= __supported_pte_mask;
 
 	if (pgprot_val(flags))
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e9..d669c5e797e0 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void)
 	if (!kaslr_memory_enabled())
 		return;
 
-	kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+	kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
 	kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
 
 	/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bc4bc7b2f075..487b8474c01c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 {
 	int cpu;
 
-	struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
+	struct flush_tlb_info info = {
 		.mm = mm,
 		.stride_shift = stride_shift,
 		.freed_tables = freed_tables,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 54357a258b35..6ebc2098cfe1 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1611,7 +1611,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 			   struct screen_info *si, efi_guid_t *proto,
 			   unsigned long size);
 
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
 extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
 extern unsigned long efi_call_virt_save_flags(void);
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-04-12 13:10 Ingo Molnar
  2019-04-13  4:05 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-04-12 13:10 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 x86/asm: Use stricter assembly constraints in bitops

Fix typos in user-visible resctrl parameters, and also fix assembly 
constraint bugs that might result in miscompilation.

 Thanks,

	Ingo

------------------>
Alexander Potapenko (1):
      x86/asm: Use stricter assembly constraints in bitops

Xiaochen Shen (1):
      x86/resctrl: Fix typos in the mba_sc mount option


 arch/x86/include/asm/bitops.h          | 41 +++++++++++++++-------------------
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  6 ++---
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb04..8e790ec219a5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x)			 "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x)			"+m" (*(volatile char *) (x))
 
-#define ADDR				BITOP_ADDR(addr)
+#define ADDR				RLONG_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
 			: "memory");
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
-			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
 	bool negative;
 	asm volatile(LOCK_PREFIX "andb %2,%1"
 		CC_SET(s)
-		: CC_OUT(s) (negative), ADDR
+		: CC_OUT(s) (negative), WBYTE_ADDR(addr)
 		: "ir" ((char) ~(1 << nr)) : "memory");
 	return negative;
 }
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
  * __clear_bit() is non-atomic and implies release semantics before the memory
  * operation. It can be used for an unlock if no other CPUs can concurrently
  * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
  */
 static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-	barrier();
 	__clear_bit(nr, addr);
 }
 
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 
 	asm(__ASM_SIZE(bts) " %2,%1"
 	    CC_SET(c)
-	    : CC_OUT(c) (oldbit), ADDR
-	    : "Ir" (nr));
+	    : CC_OUT(c) (oldbit)
+	    : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 
 	asm volatile(__ASM_SIZE(btr) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr));
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 
 	asm volatile(__ASM_SIZE(btc) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr) : "memory");
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 
 	return oldbit;
 }
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 	asm volatile(__ASM_SIZE(bt) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
-		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
 
 	return oldbit;
 }
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 399601eda8e4..54b9eef3eea9 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2039,14 +2039,14 @@ static int rdt_get_tree(struct fs_context *fc)
 enum rdt_param {
 	Opt_cdp,
 	Opt_cdpl2,
-	Opt_mba_mpbs,
+	Opt_mba_mbps,
 	nr__rdt_params
 };
 
 static const struct fs_parameter_spec rdt_param_specs[] = {
 	fsparam_flag("cdp",		Opt_cdp),
 	fsparam_flag("cdpl2",		Opt_cdpl2),
-	fsparam_flag("mba_mpbs",	Opt_mba_mpbs),
+	fsparam_flag("mba_MBps",	Opt_mba_mbps),
 	{}
 };
 
@@ -2072,7 +2072,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_cdpl2:
 		ctx->enable_cdpl2 = true;
 		return 0;
-	case Opt_mba_mpbs:
+	case Opt_mba_mbps:
 		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 			return -EINVAL;
 		ctx->enable_mba_mbps = true;

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-02-17 10:19 Ingo Molnar
  2019-02-17 16:50 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-02-17 10:19 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f331e766c4be33f4338574f3c9f7f77e98ab4571 x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls

Three changes:

 - An UV fix/quirk to pull UV BIOS calls into the efi_runtime_lock 
   locking regime. (This done by aliasing __efi_uv_runtime_lock to
   efi_runtime_lock, which should make the quirk nature obvious and
   maintain the general policy that the EFI lock (name...) isn't exposed
   to drivers.)

 - Our version of MAGA: Make a.out Great Again.

 - Add a new Intel model name enumerator to an upstream header to help 
   reduce dependencies going forward.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/firmware/efi/runtime-wrappers.c# f331e766c4be: x86/platform/UV: Use efi_run

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/a.out: Clear the dump structure initially

Hedi Berriche (1):
      x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls

Rajneesh Bhardwaj (1):
      x86/CPU: Add Icelake model number


 arch/x86/ia32/ia32_aout.c               |  6 ++++--
 arch/x86/include/asm/intel-family.h     |  2 ++
 arch/x86/include/asm/uv/bios.h          |  8 +++++++-
 arch/x86/platform/uv/bios_uv.c          | 23 +++++++++++++++++++++--
 drivers/firmware/efi/runtime-wrappers.c |  7 +++++++
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f65b78d32f5e..7dbbe9ffda17 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -51,7 +51,7 @@ static unsigned long get_dr(int n)
 /*
  * fill in the user structure for a core dump..
  */
-static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
+static void fill_dump(struct pt_regs *regs, struct user32 *dump)
 {
 	u32 fs, gs;
 	memset(dump, 0, sizeof(*dump));
@@ -157,10 +157,12 @@ static int aout_core_dump(struct coredump_params *cprm)
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 	has_dumped = 1;
+
+	fill_dump(cprm->regs, &dump);
+
 	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
 	dump.u_ar0 = offsetof(struct user32, regs);
 	dump.signal = cprm->siginfo->si_signo;
-	dump_thread32(cprm->regs, &dump);
 
 	/*
 	 * If the size of the dump file exceeds the rlimit, then see
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index d9a9993af882..9f15384c504a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,6 +52,8 @@
 
 #define INTEL_FAM6_CANNONLAKE_MOBILE	0x66
 
+#define INTEL_FAM6_ICELAKE_MOBILE	0x7E
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc6186..3f697a9e3f59 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -48,7 +48,8 @@ enum {
 	BIOS_STATUS_SUCCESS		=  0,
 	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
 	BIOS_STATUS_EINVAL		= -EINVAL,
-	BIOS_STATUS_UNAVAIL		= -EBUSY
+	BIOS_STATUS_UNAVAIL		= -EBUSY,
+	BIOS_STATUS_ABORT		= -EINTR,
 };
 
 /* Address map parameters */
@@ -167,4 +168,9 @@ extern long system_serial_number;
 
 extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
+/*
+ * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
+ */
+extern struct semaphore __efi_uv_runtime_lock;
+
 #endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 4a6a5a26c582..eb33432f2f24 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -29,7 +29,8 @@
 
 struct uv_systab *uv_systab;
 
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+			u64 a4, u64 a5)
 {
 	struct uv_systab *tab = uv_systab;
 	s64 ret;
@@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 
 	return ret;
 }
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+{
+	s64 ret;
+
+	if (down_interruptible(&__efi_uv_runtime_lock))
+		return BIOS_STATUS_ABORT;
+
+	ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+	up(&__efi_uv_runtime_lock);
+
+	return ret;
+}
 EXPORT_SYMBOL_GPL(uv_bios_call);
 
 s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 	unsigned long bios_flags;
 	s64 ret;
 
+	if (down_interruptible(&__efi_uv_runtime_lock))
+		return BIOS_STATUS_ABORT;
+
 	local_irq_save(bios_flags);
-	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
 	local_irq_restore(bios_flags);
 
+	up(&__efi_uv_runtime_lock);
+
 	return ret;
 }
 
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 8903b9ccfc2b..e2abfdb5cee6 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -146,6 +146,13 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
  */
 static DEFINE_SEMAPHORE(efi_runtime_lock);
 
+/*
+ * Expose the EFI runtime lock to the UV platform
+ */
+#ifdef CONFIG_X86_UV
+extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock);
+#endif
+
 /*
  * Calls the appropriate efi_runtime_service() with the appropriate
  * arguments.

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-02-10  9:13 Ingo Molnar
  2019-02-10 18:30 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-02-10  9:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 20e55bc17dd01f13cec0eb17e76e9511b23963ef x86/mm: Make set_pmd_at() paravirt aware

A handful of fixes:

 - Fix an MCE corner case bug/crash found via MCE injection testing

 - Fix 5-level paging boot crash

 - Fix MCE recovery cache invalidation bug

 - Fix regression on Xen guests caused by a recent PMD level mremap 
   speedup optimization

 Thanks,

	Ingo

------------------>
Juergen Gross (1):
      x86/mm: Make set_pmd_at() paravirt aware

Kirill A. Shutemov (1):
      x86/boot/compressed/64: Do not corrupt EDX on EFER.LME=1 setting

Peter Zijlstra (1):
      x86/mm/cpa: Fix set_mce_nospec()

Tony Luck (1):
      x86/MCE: Initialize mce.bank in the case of a fatal error in mce_no_way_out()


 arch/x86/boot/compressed/head_64.S |  2 ++
 arch/x86/include/asm/pgtable.h     |  2 +-
 arch/x86/kernel/cpu/mce/core.c     |  1 +
 arch/x86/mm/pageattr.c             | 50 +++++++++++++++++++-------------------
 4 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f105ae8651c9..f62e347862cc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -602,10 +602,12 @@ ENTRY(trampoline_32bit_src)
 3:
 	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
 	pushl	%ecx
+	pushl	%edx
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	btsl	$_EFER_LME, %eax
 	wrmsr
+	popl	%edx
 	popl	%ecx
 
 	/* Enable PAE and LA57 (if required) paging modes */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..2779ace16d23 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t pmd)
 {
-	native_set_pmd(pmdp, pmd);
+	set_pmd(pmdp, pmd);
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 672c7225cb1b..6ce290c506d9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			quirk_no_way_out(i, m, regs);
 
 		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			m->bank = i;
 			mce_read_aux(m, i);
 			*msg = tmp;
 			return 1;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4f8972311a77..14e6119838a6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
 
 #endif
 
+/*
+ * See set_mce_nospec().
+ *
+ * Machine check recovery code needs to change cache mode of poisoned pages to
+ * UC to avoid speculative access logging another error. But passing the
+ * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
+ * speculative access. So we cheat and flip the top bit of the address. This
+ * works fine for the code that updates the page tables. But at the end of the
+ * process we need to flush the TLB and cache and the non-canonical address
+ * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long fix_addr(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+	return (long)(addr << 1) >> 1;
+#else
+	return addr;
+#endif
+}
+
 static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
 {
 	if (cpa->flags & CPA_PAGES_ARRAY) {
@@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data)
 	unsigned int i;
 
 	for (i = 0; i < cpa->numpages; i++)
-		__flush_tlb_one_kernel(__cpa_addr(cpa, i));
+		__flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
 }
 
 static void cpa_flush(struct cpa_data *data, int cache)
@@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
 		 * Only flush present addresses:
 		 */
 		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
-			clflush_cache_range_opt((void *)addr, PAGE_SIZE);
+			clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
 	}
 	mb();
 }
@@ -1627,29 +1650,6 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 	return ret;
 }
 
-/*
- * Machine check recovery code needs to change cache mode of poisoned
- * pages to UC to avoid speculative access logging another error. But
- * passing the address of the 1:1 mapping to set_memory_uc() is a fine
- * way to encourage a speculative access. So we cheat and flip the top
- * bit of the address. This works fine for the code that updates the
- * page tables. But at the end of the process we need to flush the cache
- * and the non-canonical address causes a #GP fault when used by the
- * CLFLUSH instruction.
- *
- * But in the common case we already have a canonical address. This code
- * will fix the top bit if needed and is a no-op otherwise.
- */
-static inline unsigned long make_addr_canonical_again(unsigned long addr)
-{
-#ifdef CONFIG_X86_64
-	return (long)(addr << 1) >> 1;
-#else
-	return addr;
-#endif
-}
-
-
 static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 				    pgprot_t mask_set, pgprot_t mask_clr,
 				    int force_split, int in_flag,

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-01-11  7:14 Ingo Molnar
  2019-01-11 18:00 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2019-01-11  7:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: e4f358916d528d479c3c12bd2fd03f2d5a576380 x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE

A 32-bit build fix, CONFIG_RETPOLINE fixes and rename CONFIG_RESCTRL to 
CONFIG_X86_RESCTRL.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/module.h             # e4f358916d52: x86, modpost: Replace last r
  samples/seccomp/Makefile           # a77d1d196bc6: samples/seccomp: Fix 32-bit 
  scripts/mod/modpost.c              # e4f358916d52: x86, modpost: Replace last r

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/cache: Rename config option to CONFIG_X86_RESCTRL

Tycho Andersen (1):
      samples/seccomp: Fix 32-bit build

WANG Chao (1):
      x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE


 Documentation/x86/resctrl_ui.txt     | 2 +-
 arch/x86/Kconfig                     | 2 +-
 arch/x86/include/asm/resctrl_sched.h | 4 ++--
 arch/x86/kernel/cpu/Makefile         | 2 +-
 arch/x86/kernel/cpu/bugs.c           | 2 +-
 arch/x86/kernel/cpu/resctrl/Makefile | 4 ++--
 include/linux/compiler-gcc.h         | 2 +-
 include/linux/module.h               | 2 +-
 include/linux/sched.h                | 2 +-
 samples/seccomp/Makefile             | 1 +
 scripts/mod/modpost.c                | 2 +-
 11 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt
index d9aed8303984..e8e8d14d3c4e 100644
--- a/Documentation/x86/resctrl_ui.txt
+++ b/Documentation/x86/resctrl_ui.txt
@@ -9,7 +9,7 @@ Fenghua Yu <fenghua.yu@intel.com>
 Tony Luck <tony.luck@intel.com>
 Vikas Shivappa <vikas.shivappa@intel.com>
 
-This feature is enabled by the CONFIG_RESCTRL and the X86 /proc/cpuinfo
+This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo
 flag bits:
 RDT (Resource Director Technology) Allocation - "rdt_a"
 CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6185d4f33296..15af091611e2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -446,7 +446,7 @@ config RETPOLINE
 	  branches. Requires a compiler with -mindirect-branch=thunk-extern
 	  support for full protection. The kernel may run slower.
 
-config RESCTRL
+config X86_RESCTRL
 	bool "Resource Control support"
 	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
 	select KERNFS
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
index 54990fe2a3ae..40ebddde6ac2 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl_sched.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_RESCTRL_SCHED_H
 #define _ASM_X86_RESCTRL_SCHED_H
 
-#ifdef CONFIG_RESCTRL
+#ifdef CONFIG_X86_RESCTRL
 
 #include <linux/sched.h>
 #include <linux/jump_label.h>
@@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void)
 
 static inline void resctrl_sched_in(void) {}
 
-#endif /* CONFIG_RESCTRL */
+#endif /* CONFIG_X86_RESCTRL */
 
 #endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ac78f90aea56..b6fa0869f7aa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 obj-$(CONFIG_X86_MCE)			+= mce/
 obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_MICROCODE)			+= microcode/
-obj-$(CONFIG_RESCTRL)			+= resctrl/
+obj-$(CONFIG_X86_RESCTRL)		+= resctrl/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8654b8b0c848..1de0f4170178 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -215,7 +215,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
 static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
 	SPECTRE_V2_USER_NONE;
 
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
 static bool spectre_v2_bad_module;
 
 bool retpoline_module_ok(bool has_retpoline)
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 6895049ceef7..1cabe6fd8e11 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_RESCTRL)	+= core.o rdtgroup.o monitor.o
-obj-$(CONFIG_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_RESCTRL)	+= core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
 CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5776da43da97..dd8268f5f5f0 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -68,7 +68,7 @@
  */
 #define uninitialized_var(x) x = x
 
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
 #define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 9a21fe3509af..8fa38d3e7538 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -828,7 +828,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
 static inline void module_bug_cleanup(struct module *mod) {}
 #endif	/* CONFIG_GENERIC_BUG */
 
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
 extern bool retpoline_module_ok(bool has_retpoline);
 #else
 static inline bool retpoline_module_ok(bool has_retpoline)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89541d248893..224666226e87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -995,7 +995,7 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
 	struct list_head		cg_list;
 #endif
-#ifdef CONFIG_RESCTRL
+#ifdef CONFIG_X86_RESCTRL
 	u32				closid;
 	u32				rmid;
 #endif
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 4920903c8009..fb43a814d4c0 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -34,6 +34,7 @@ HOSTCFLAGS_bpf-direct.o += $(MFLAG)
 HOSTCFLAGS_dropper.o += $(MFLAG)
 HOSTCFLAGS_bpf-helper.o += $(MFLAG)
 HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
+HOSTCFLAGS_user-trap.o += $(MFLAG)
 HOSTLDLIBS_bpf-direct += $(MFLAG)
 HOSTLDLIBS_bpf-fancy += $(MFLAG)
 HOSTLDLIBS_dropper += $(MFLAG)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 0de2fb236640..26bf886bd168 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2185,7 +2185,7 @@ static void add_intree_flag(struct buffer *b, int is_intree)
 /* Cannot check for assembler */
 static void add_retpoline(struct buffer *b)
 {
-	buf_printf(b, "\n#ifdef RETPOLINE\n");
+	buf_printf(b, "\n#ifdef CONFIG_RETPOLINE\n");
 	buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
 	buf_printf(b, "#endif\n");
 }

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-12-21 12:25 Ingo Molnar
  2018-12-21 19:30 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-12-21 12:25 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton, Masahiro Yamada

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 6ac389346e6964e1f6a1c675cebf8bd0912526a5 Revert "kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs"

The biggest part is a series of reverts for the macro based GCC inlining 
workarounds. It caused regressions in distro build and other kernel 
tooling environments, and the GCC project was very receptive to fixing 
the underlying inliner weaknesses - so as time ran out we decided to do a 
reasonably straightforward revert of the patches. The plan is to rely on 
the 'asm inline' GCC 9 feature, which might be backported to GCC 8 and 
could thus become reasonably widely available on modern distros.

Other than those reverts, there's misc fixes from all around the place.

I wish our final x86 pull request for v4.20 was smaller...

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  Makefile                           # 6ac389346e69: Revert "kbuild/Makefile: Pre
  include/asm-generic/bug.h          # ffb61c6346d0: Revert "x86/bug: Macrofy the
  include/linux/compiler.h           # 96af6cd02a10: Revert "x86/objtool: Use asm
  scripts/Kbuild.include             # 6ac389346e69: Revert "kbuild/Makefile: Pre
  scripts/mod/Makefile               # 6ac389346e69: Revert "kbuild/Makefile: Pre

 Thanks,

	Ingo

------------------>
Alistair Strachan (1):
      x86/vdso: Pass --eh-frame-hdr to the linker

Chang S. Bae (1):
      x86/fsgsbase/64: Fix the base write helper functions

Colin Ian King (1):
      x86/mtrr: Don't copy uninitialized gentry fields back to userspace

Dan Williams (1):
      x86/mm: Fix decoy address handling vs 32-bit builds

Ingo Molnar (9):
      Revert "x86/jump-labels: Macrofy inline assembly code to work around GCC inlining bugs"
      Revert "x86/cpufeature: Macrofy inline assembly code to work around GCC inlining bugs"
      Revert "x86/extable: Macrofy inline assembly code to work around GCC inlining bugs"
      Revert "x86/paravirt: Work around GCC inlining bugs when compiling paravirt ops"
      Revert "x86/bug: Macrofy the BUG table section handling, to work around GCC inlining bugs"
      Revert "x86/alternatives: Macrofy lock prefixes to work around GCC inlining bugs"
      Revert "x86/refcount: Work around GCC inlining bug"
      Revert "x86/objtool: Use asm macros to work around GCC inlining bugs"
      Revert "kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs"

Kirill A. Shutemov (2):
      x86/mm: Fix guard hole handling
      x86/dump_pagetables: Fix LDT remap address marker

Peter Zijlstra (1):
      x86/mm/cpa: Fix cpa_flush_array() TLB invalidation

Reinette Chatre (1):
      x86/intel_rdt: Ensure a CPU remains online for the region's pseudo-locking sequence


 Makefile                                    |  9 +--
 arch/x86/Makefile                           |  7 --
 arch/x86/entry/calling.h                    |  2 +-
 arch/x86/entry/vdso/Makefile                |  3 +-
 arch/x86/include/asm/alternative-asm.h      | 20 ++----
 arch/x86/include/asm/alternative.h          | 11 +++-
 arch/x86/include/asm/asm.h                  | 53 +++++++++------
 arch/x86/include/asm/bug.h                  | 98 ++++++++++++----------------
 arch/x86/include/asm/cpufeature.h           | 82 ++++++++++--------------
 arch/x86/include/asm/fsgsbase.h             | 15 +++--
 arch/x86/include/asm/jump_label.h           | 72 +++++++++++++++------
 arch/x86/include/asm/paravirt_types.h       | 56 ++++++++--------
 arch/x86/include/asm/pgtable_64_types.h     |  5 ++
 arch/x86/include/asm/refcount.h             | 81 ++++++++++-------------
 arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c |  4 ++
 arch/x86/kernel/cpu/mtrr/if.c               |  2 +
 arch/x86/kernel/macros.S                    | 16 -----
 arch/x86/kernel/process_64.c                | 99 ++++++++++++++++-------------
 arch/x86/kernel/ptrace.c                    |  9 +--
 arch/x86/mm/dump_pagetables.c               | 15 ++---
 arch/x86/mm/pageattr.c                      | 24 ++++---
 arch/x86/mm/pat.c                           | 13 +++-
 arch/x86/xen/mmu_pv.c                       | 11 ++--
 include/asm-generic/bug.h                   |  8 +--
 include/linux/compiler.h                    | 56 ++++------------
 scripts/Kbuild.include                      |  4 +-
 scripts/mod/Makefile                        |  2 -
 27 files changed, 385 insertions(+), 392 deletions(-)
 delete mode 100644 arch/x86/kernel/macros.S

diff --git a/Makefile b/Makefile
index e9fd22c8445e..da4bb1e10388 100644
--- a/Makefile
+++ b/Makefile
@@ -1081,7 +1081,7 @@ scripts: scripts_basic scripts_dtc asm-generic gcc-plugins $(autoksyms_h)
 # version.h and scripts_basic is processed / created.
 
 # Listed in dependency order
-PHONY += prepare archprepare macroprepare prepare0 prepare1 prepare2 prepare3
+PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
 
 # prepare3 is used to check if we are building in a separate output directory,
 # and if so do:
@@ -1104,9 +1104,7 @@ prepare2: prepare3 outputmakefile asm-generic
 prepare1: prepare2 $(version_h) $(autoksyms_h) include/generated/utsrelease.h
 	$(cmd_crmodverdir)
 
-macroprepare: prepare1 archmacros
-
-archprepare: archheaders archscripts macroprepare scripts_basic
+archprepare: archheaders archscripts prepare1 scripts_basic
 
 prepare0: archprepare gcc-plugins
 	$(Q)$(MAKE) $(build)=.
@@ -1174,9 +1172,6 @@ archheaders:
 PHONY += archscripts
 archscripts:
 
-PHONY += archmacros
-archmacros:
-
 PHONY += __headers
 __headers: $(version_h) scripts_basic uapi-asm-generic archheaders archscripts
 	$(Q)$(MAKE) $(build)=scripts build_unifdef
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 75ef499a66e2..85a66c4a8b65 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,13 +232,6 @@ archscripts: scripts_basic
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
 
-archmacros:
-	$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
-
-ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
-export ASM_MACRO_FLAGS
-KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
-
 ###
 # Kernel objects
 
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 25e5a6bda8c3..20d0885b00fb 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -352,7 +352,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro CALL_enter_from_user_mode
 #ifdef CONFIG_CONTEXT_TRACKING
 #ifdef HAVE_JUMP_LABEL
-	STATIC_BRANCH_JMP l_yes=.Lafter_call_\@, key=context_tracking_enabled, branch=1
+	STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
 #endif
 	call enter_from_user_mode
 .Lafter_call_\@:
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 0624bf2266fd..5bfe2243a08f 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -171,7 +171,8 @@ quiet_cmd_vdso = VDSO    $@
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
 VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
-	$(call ld-option, --build-id) -Bsymbolic
+	$(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
+	-Bsymbolic
 GCOV_PROFILE := n
 
 #
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 8e4ea39e55d0..31b627b43a8e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -7,24 +7,16 @@
 #include <asm/asm.h>
 
 #ifdef CONFIG_SMP
-.macro LOCK_PREFIX_HERE
+	.macro LOCK_PREFIX
+672:	lock
 	.pushsection .smp_locks,"a"
 	.balign 4
-	.long 671f - .		# offset
+	.long 672b - .
 	.popsection
-671:
-.endm
-
-.macro LOCK_PREFIX insn:vararg
-	LOCK_PREFIX_HERE
-	lock \insn
-.endm
+	.endm
 #else
-.macro LOCK_PREFIX_HERE
-.endm
-
-.macro LOCK_PREFIX insn:vararg
-.endm
+	.macro LOCK_PREFIX
+	.endm
 #endif
 
 /*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index d7faa16622d8..4cd6a3b71824 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -31,8 +31,15 @@
  */
 
 #ifdef CONFIG_SMP
-#define LOCK_PREFIX_HERE "LOCK_PREFIX_HERE\n\t"
-#define LOCK_PREFIX "LOCK_PREFIX "
+#define LOCK_PREFIX_HERE \
+		".pushsection .smp_locks,\"a\"\n"	\
+		".balign 4\n"				\
+		".long 671f - .\n" /* offset */		\
+		".popsection\n"				\
+		"671:"
+
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+
 #else /* ! CONFIG_SMP */
 #define LOCK_PREFIX_HERE ""
 #define LOCK_PREFIX ""
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 21b086786404..6467757bb39f 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -120,25 +120,12 @@
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-	ASM_EXTABLE_HANDLE from to handler
-
-.macro ASM_EXTABLE_HANDLE from:req to:req handler:req
-	.pushsection "__ex_table","a"
-	.balign 4
-	.long (\from) - .
-	.long (\to) - .
-	.long (\handler) - .
+	.pushsection "__ex_table","a" ;				\
+	.balign 4 ;						\
+	.long (from) - . ;					\
+	.long (to) - . ;					\
+	.long (handler) - . ;					\
 	.popsection
-.endm
-#else /* __ASSEMBLY__ */
-
-# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-	"ASM_EXTABLE_HANDLE from=" #from " to=" #to		\
-	" handler=\"" #handler "\"\n\t"
-
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-
-#endif /* __ASSEMBLY__ */
 
 # define _ASM_EXTABLE(from, to)					\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
@@ -161,7 +148,6 @@
 	_ASM_PTR (entry);					\
 	.popsection
 
-#ifdef __ASSEMBLY__
 .macro ALIGN_DESTINATION
 	/* check for bad alignment of destination */
 	movl %edi,%ecx
@@ -185,7 +171,34 @@
 	_ASM_EXTABLE_UA(100b, 103b)
 	_ASM_EXTABLE_UA(101b, 103b)
 	.endm
-#endif /* __ASSEMBLY__ */
+
+#else
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
+	" .pushsection \"__ex_table\",\"a\"\n"			\
+	" .balign 4\n"						\
+	" .long (" #from ") - .\n"				\
+	" .long (" #to ") - .\n"				\
+	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
+	" .popsection\n"
+
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
+# define _ASM_EXTABLE_REFCOUNT(from, to)			\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+#endif
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 5090035e6d16..6804d6642767 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -4,8 +4,6 @@
 
 #include <linux/stringify.h>
 
-#ifndef __ASSEMBLY__
-
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
  *
@@ -22,15 +20,53 @@
 
 #define LEN_UD2		2
 
+#ifdef CONFIG_GENERIC_BUG
+
+#ifdef CONFIG_X86_32
+# define __BUG_REL(val)	".long " __stringify(val)
+#else
+# define __BUG_REL(val)	".long " __stringify(val) " - 2b"
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define _BUG_FLAGS(ins, flags)						\
+do {									\
+	asm volatile("1:\t" ins "\n"					\
+		     ".pushsection __bug_table,\"aw\"\n"		\
+		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
+		     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"	\
+		     "\t.word %c1"        "\t# bug_entry::line\n"	\
+		     "\t.word %c2"        "\t# bug_entry::flags\n"	\
+		     "\t.org 2b+%c3\n"					\
+		     ".popsection"					\
+		     : : "i" (__FILE__), "i" (__LINE__),		\
+			 "i" (flags),					\
+			 "i" (sizeof(struct bug_entry)));		\
+} while (0)
+
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
 #define _BUG_FLAGS(ins, flags)						\
 do {									\
-	asm volatile("ASM_BUG ins=\"" ins "\" file=%c0 line=%c1 "	\
-		     "flags=%c2 size=%c3"				\
-		     : : "i" (__FILE__), "i" (__LINE__),                \
-			 "i" (flags),                                   \
+	asm volatile("1:\t" ins "\n"					\
+		     ".pushsection __bug_table,\"aw\"\n"		\
+		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
+		     "\t.word %c0"        "\t# bug_entry::flags\n"	\
+		     "\t.org 2b+%c1\n"					\
+		     ".popsection"					\
+		     : : "i" (flags),					\
 			 "i" (sizeof(struct bug_entry)));		\
 } while (0)
 
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#else
+
+#define _BUG_FLAGS(ins, flags)  asm volatile(ins)
+
+#endif /* CONFIG_GENERIC_BUG */
+
 #define HAVE_ARCH_BUG
 #define BUG()							\
 do {								\
@@ -46,54 +82,4 @@ do {								\
 
 #include <asm-generic/bug.h>
 
-#else /* __ASSEMBLY__ */
-
-#ifdef CONFIG_GENERIC_BUG
-
-#ifdef CONFIG_X86_32
-.macro __BUG_REL val:req
-	.long \val
-.endm
-#else
-.macro __BUG_REL val:req
-	.long \val - 2b
-.endm
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-1:	\ins
-	.pushsection __bug_table,"aw"
-2:	__BUG_REL val=1b	# bug_entry::bug_addr
-	__BUG_REL val=\file	# bug_entry::file
-	.word \line		# bug_entry::line
-	.word \flags		# bug_entry::flags
-	.org 2b+\size
-	.popsection
-.endm
-
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-1:	\ins
-	.pushsection __bug_table,"aw"
-2:	__BUG_REL val=1b	# bug_entry::bug_addr
-	.word \flags		# bug_entry::flags
-	.org 2b+\size
-	.popsection
-.endm
-
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
-#else /* CONFIG_GENERIC_BUG */
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-	\ins
-.endm
-
-#endif /* CONFIG_GENERIC_BUG */
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7d442722ef24..aced6c9290d6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -2,10 +2,10 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
 #include <asm/processor.h>
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+
 #include <asm/asm.h>
 #include <linux/bitops.h>
 
@@ -161,10 +161,37 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-	asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] "
-			  "cap_byte=\"%[cap_byte]\" "
-			  "feature=%P[feature] t_yes=%l[t_yes] "
-			  "t_no=%l[t_no] always=%P[always]"
+	asm_volatile_goto("1: jmp 6f\n"
+		 "2:\n"
+		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			 "((5f-4f) - (2b-1b)),0x90\n"
+		 "3:\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 4f - .\n"		/* repl offset */
+		 " .word %P[always]\n"		/* always replace */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 5f - 4f\n"		/* repl len */
+		 " .byte 3b - 2b\n"		/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_replacement,\"ax\"\n"
+		 "4: jmp %l[t_no]\n"
+		 "5:\n"
+		 ".previous\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 0\n"			/* no replacement */
+		 " .word %P[feature]\n"		/* feature bit */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 0\n"			/* repl len */
+		 " .byte 0\n"			/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_aux,\"ax\"\n"
+		 "6:\n"
+		 " testb %[bitnum],%[cap_byte]\n"
+		 " jnz %l[t_yes]\n"
+		 " jmp %l[t_no]\n"
+		 ".previous\n"
 		 : : [feature]  "i" (bit),
 		     [always]   "i" (X86_FEATURE_ALWAYS),
 		     [bitnum]   "i" (1 << (bit & 7)),
@@ -199,44 +226,5 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 #define CPU_FEATURE_TYPEVAL		boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
 					boot_cpu_data.x86_model
 
-#else /* __ASSEMBLY__ */
-
-.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req
-1:
-	jmp 6f
-2:
-	.skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90
-3:
-	.section .altinstructions,"a"
-	.long 1b - .		/* src offset */
-	.long 4f - .		/* repl offset */
-	.word \always		/* always replace */
-	.byte 3b - 1b		/* src len */
-	.byte 5f - 4f		/* repl len */
-	.byte 3b - 2b		/* pad len */
-	.previous
-	.section .altinstr_replacement,"ax"
-4:
-	jmp \t_no
-5:
-	.previous
-	.section .altinstructions,"a"
-	.long 1b - .		/* src offset */
-	.long 0			/* no replacement */
-	.word \feature		/* feature bit */
-	.byte 3b - 1b		/* src len */
-	.byte 0			/* repl len */
-	.byte 0			/* pad len */
-	.previous
-	.section .altinstr_aux,"ax"
-6:
-	testb \bitnum,\cap_byte
-	jnz \t_yes
-	jmp \t_no
-	.previous
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
 #endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index eb377b6e9eed..bca4c743de77 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -16,8 +16,8 @@
  */
 extern unsigned long x86_fsbase_read_task(struct task_struct *task);
 extern unsigned long x86_gsbase_read_task(struct task_struct *task);
-extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
+extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
 
 /* Helper functions for reading/writing FS/GS base */
 
@@ -39,8 +39,15 @@ static inline unsigned long x86_gsbase_read_cpu_inactive(void)
 	return gsbase;
 }
 
-extern void x86_fsbase_write_cpu(unsigned long fsbase);
-extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+	wrmsrl(MSR_FS_BASE, fsbase);
+}
+
+static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+}
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a5fb34fe56a4..21efc9d07ed9 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,19 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
+#ifndef HAVE_JUMP_LABEL
+/*
+ * For better or for worse, if jump labels (the gcc extension) are missing,
+ * then the entire static branch patching infrastructure is compiled out.
+ * If that happens, the code in here will malfunction.  Raise a compiler
+ * error instead.
+ *
+ * In theory, jump labels and the static branch patching infrastructure
+ * could be decoupled to fix this.
+ */
+#error asm/jump_label.h included on a non-jump-label kernel
+#endif
+
 #define JUMP_LABEL_NOP_SIZE 5
 
 #ifdef CONFIG_X86_64
@@ -20,9 +33,15 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-	asm_volatile_goto("STATIC_BRANCH_NOP l_yes=\"%l[l_yes]\" key=\"%c0\" "
-			  "branch=\"%c1\""
-			: :  "i" (key), "i" (branch) : : l_yes);
+	asm_volatile_goto("1:"
+		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		".long 1b - ., %l[l_yes] - . \n\t"
+		_ASM_PTR "%c0 + %c1 - .\n\t"
+		".popsection \n\t"
+		: :  "i" (key), "i" (branch) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
@@ -30,8 +49,14 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
 
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
-	asm_volatile_goto("STATIC_BRANCH_JMP l_yes=\"%l[l_yes]\" key=\"%c0\" "
-			  "branch=\"%c1\""
+	asm_volatile_goto("1:"
+		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+		"2:\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		".long 1b - ., %l[l_yes] - . \n\t"
+		_ASM_PTR "%c0 + %c1 - .\n\t"
+		".popsection \n\t"
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -41,26 +66,37 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 
 #else	/* __ASSEMBLY__ */
 
-.macro STATIC_BRANCH_NOP l_yes:req key:req branch:req
-.Lstatic_branch_nop_\@:
-	.byte STATIC_KEY_INIT_NOP
-.Lstatic_branch_no_after_\@:
+.macro STATIC_JUMP_IF_TRUE target, key, def
+.Lstatic_jump_\@:
+	.if \def
+	/* Equivalent to "jmp.d32 \target" */
+	.byte		0xe9
+	.long		\target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+	.else
+	.byte		STATIC_KEY_INIT_NOP
+	.endif
 	.pushsection __jump_table, "aw"
 	_ASM_ALIGN
-	.long		.Lstatic_branch_nop_\@ - ., \l_yes - .
-	_ASM_PTR        \key + \branch - .
+	.long		.Lstatic_jump_\@ - ., \target - .
+	_ASM_PTR	\key - .
 	.popsection
 .endm
 
-.macro STATIC_BRANCH_JMP l_yes:req key:req branch:req
-.Lstatic_branch_jmp_\@:
-	.byte 0xe9
-	.long \l_yes - .Lstatic_branch_jmp_after_\@
-.Lstatic_branch_jmp_after_\@:
+.macro STATIC_JUMP_IF_FALSE target, key, def
+.Lstatic_jump_\@:
+	.if \def
+	.byte		STATIC_KEY_INIT_NOP
+	.else
+	/* Equivalent to "jmp.d32 \target" */
+	.byte		0xe9
+	.long		\target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+	.endif
 	.pushsection __jump_table, "aw"
 	_ASM_ALIGN
-	.long		.Lstatic_branch_jmp_\@ - ., \l_yes - .
-	_ASM_PTR	\key + \branch - .
+	.long		.Lstatic_jump_\@ - ., \target - .
+	_ASM_PTR	\key + 1 - .
 	.popsection
 .endm
 
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 26942ad63830..488c59686a73 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -348,11 +348,23 @@ extern struct paravirt_patch_template pv_ops;
 #define paravirt_clobber(clobber)		\
 	[paravirt_clobber] "i" (clobber)
 
+/*
+ * Generate some code, and mark it as patchable by the
+ * apply_paravirt() alternate instruction patcher.
+ */
+#define _paravirt_alt(insn_string, type, clobber)	\
+	"771:\n\t" insn_string "\n" "772:\n"		\
+	".pushsection .parainstructions,\"a\"\n"	\
+	_ASM_ALIGN "\n"					\
+	_ASM_PTR " 771b\n"				\
+	"  .byte " type "\n"				\
+	"  .byte 772b-771b\n"				\
+	"  .short " clobber "\n"			\
+	".popsection\n"
+
 /* Generate patchable code, with the default asm parameters. */
-#define paravirt_call							\
-	"PARAVIRT_CALL type=\"%c[paravirt_typenum]\""			\
-	" clobber=\"%c[paravirt_clobber]\""				\
-	" pv_opptr=\"%c[paravirt_opptr]\";"
+#define paravirt_alt(insn_string)					\
+	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
 
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
@@ -372,6 +384,16 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
 
 int paravirt_disable_iospace(void);
 
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+#define PARAVIRT_CALL					\
+	ANNOTATE_RETPOLINE_SAFE				\
+	"call *%c[paravirt_opptr];"
+
 /*
  * These macros are intended to wrap calls through one of the paravirt
  * ops structs, so that they can be later identified and patched at
@@ -509,7 +531,7 @@ int paravirt_disable_iospace(void);
 		/* since this condition will never hold */		\
 		if (sizeof(rettype) > sizeof(unsigned long)) {		\
 			asm volatile(pre				\
-				     paravirt_call			\
+				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
 				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
@@ -519,7 +541,7 @@ int paravirt_disable_iospace(void);
 			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
 		} else {						\
 			asm volatile(pre				\
-				     paravirt_call			\
+				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
 				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
@@ -546,7 +568,7 @@ int paravirt_disable_iospace(void);
 		PVOP_VCALL_ARGS;					\
 		PVOP_TEST_NULL(op);					\
 		asm volatile(pre					\
-			     paravirt_call				\
+			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
@@ -664,26 +686,6 @@ struct paravirt_patch_site {
 extern struct paravirt_patch_site __parainstructions[],
 	__parainstructions_end[];
 
-#else	/* __ASSEMBLY__ */
-
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-.macro PARAVIRT_CALL type:req clobber:req pv_opptr:req
-771:	ANNOTATE_RETPOLINE_SAFE
-	call *\pv_opptr
-772:	.pushsection .parainstructions,"a"
-	_ASM_ALIGN
-	_ASM_PTR 771b
-	.byte \type
-	.byte 772b-771b
-	.short \clobber
-	.popsection
-.endm
-
 #endif	/* __ASSEMBLY__ */
 
 #endif	/* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 84bd9bdc1987..88bca456da99 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -111,6 +111,11 @@ extern unsigned int ptrs_per_p4d;
  */
 #define MAXMEM			(1UL << MAX_PHYSMEM_BITS)
 
+#define GUARD_HOLE_PGD_ENTRY	-256UL
+#define GUARD_HOLE_SIZE		(16UL << PGDIR_SHIFT)
+#define GUARD_HOLE_BASE_ADDR	(GUARD_HOLE_PGD_ENTRY << PGDIR_SHIFT)
+#define GUARD_HOLE_END_ADDR	(GUARD_HOLE_BASE_ADDR + GUARD_HOLE_SIZE)
+
 #define LDT_PGD_ENTRY		-240UL
 #define LDT_BASE_ADDR		(LDT_PGD_ENTRY << PGDIR_SHIFT)
 #define LDT_END_ADDR		(LDT_BASE_ADDR + PGDIR_SIZE)
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index a8b5e1e13319..dbaed55c1c24 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -4,41 +4,6 @@
  * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
  * PaX/grsecurity.
  */
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-#include <asm/bug.h>
-
-.macro REFCOUNT_EXCEPTION counter:req
-	.pushsection .text..refcount
-111:	lea \counter, %_ASM_CX
-112:	ud2
-	ASM_UNREACHABLE
-	.popsection
-113:	_ASM_EXTABLE_REFCOUNT(112b, 113b)
-.endm
-
-/* Trigger refcount exception if refcount result is negative. */
-.macro REFCOUNT_CHECK_LT_ZERO counter:req
-	js 111f
-	REFCOUNT_EXCEPTION counter="\counter"
-.endm
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-.macro REFCOUNT_CHECK_LE_ZERO counter:req
-	jz 111f
-	REFCOUNT_CHECK_LT_ZERO counter="\counter"
-.endm
-
-/* Trigger refcount exception unconditionally. */
-.macro REFCOUNT_ERROR counter:req
-	jmp 111f
-	REFCOUNT_EXCEPTION counter="\counter"
-.endm
-
-#else /* __ASSEMBLY__ */
-
 #include <linux/refcount.h>
 #include <asm/bug.h>
 
@@ -50,12 +15,35 @@
  * central refcount exception. The fixup address for the exception points
  * back to the regular execution flow in .text.
  */
+#define _REFCOUNT_EXCEPTION				\
+	".pushsection .text..refcount\n"		\
+	"111:\tlea %[var], %%" _ASM_CX "\n"		\
+	"112:\t" ASM_UD2 "\n"				\
+	ASM_UNREACHABLE					\
+	".popsection\n"					\
+	"113:\n"					\
+	_ASM_EXTABLE_REFCOUNT(112b, 113b)
+
+/* Trigger refcount exception if refcount result is negative. */
+#define REFCOUNT_CHECK_LT_ZERO				\
+	"js 111f\n\t"					\
+	_REFCOUNT_EXCEPTION
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+#define REFCOUNT_CHECK_LE_ZERO				\
+	"jz 111f\n\t"					\
+	REFCOUNT_CHECK_LT_ZERO
+
+/* Trigger refcount exception unconditionally. */
+#define REFCOUNT_ERROR					\
+	"jmp 111f\n\t"					\
+	_REFCOUNT_EXCEPTION
 
 static __always_inline void refcount_add(unsigned int i, refcount_t *r)
 {
 	asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
-		"REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
-		: [counter] "+m" (r->refs.counter)
+		REFCOUNT_CHECK_LT_ZERO
+		: [var] "+m" (r->refs.counter)
 		: "ir" (i)
 		: "cc", "cx");
 }
@@ -63,32 +51,31 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r)
 static __always_inline void refcount_inc(refcount_t *r)
 {
 	asm volatile(LOCK_PREFIX "incl %0\n\t"
-		"REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
-		: [counter] "+m" (r->refs.counter)
+		REFCOUNT_CHECK_LT_ZERO
+		: [var] "+m" (r->refs.counter)
 		: : "cc", "cx");
 }
 
 static __always_inline void refcount_dec(refcount_t *r)
 {
 	asm volatile(LOCK_PREFIX "decl %0\n\t"
-		"REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\""
-		: [counter] "+m" (r->refs.counter)
+		REFCOUNT_CHECK_LE_ZERO
+		: [var] "+m" (r->refs.counter)
 		: : "cc", "cx");
 }
 
 static __always_inline __must_check
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
-
 	return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
-					 "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+					 REFCOUNT_CHECK_LT_ZERO,
 					 r->refs.counter, e, "er", i, "cx");
 }
 
 static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
 	return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
-					"REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+					REFCOUNT_CHECK_LT_ZERO,
 					r->refs.counter, e, "cx");
 }
 
@@ -106,8 +93,8 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 
 		/* Did we try to increment from/to an undesirable state? */
 		if (unlikely(c < 0 || c == INT_MAX || result < c)) {
-			asm volatile("REFCOUNT_ERROR counter=\"%[counter]\""
-				     : : [counter] "m" (r->refs.counter)
+			asm volatile(REFCOUNT_ERROR
+				     : : [var] "m" (r->refs.counter)
 				     : "cc", "cx");
 			break;
 		}
@@ -122,6 +109,4 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
 	return refcount_add_not_zero(1, r);
 }
 
-#endif /* __ASSEMBLY__ */
-
 #endif
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 27937458c231..efa4a519f5e5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -23,6 +23,7 @@
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -310,9 +311,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 		return -EINVAL;
 	buf[nbytes - 1] = '\0';
 
+	cpus_read_lock();
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 	if (!rdtgrp) {
 		rdtgroup_kn_unlock(of->kn);
+		cpus_read_unlock();
 		return -ENOENT;
 	}
 	rdt_last_cmd_clear();
@@ -367,6 +370,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 
 out:
 	rdtgroup_kn_unlock(of->kn);
+	cpus_read_unlock();
 	return ret ?: nbytes;
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 2e173d47b450..4d36dcc1cf87 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -165,6 +165,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	struct mtrr_gentry gentry;
 	void __user *arg = (void __user *) __arg;
 
+	memset(&gentry, 0, sizeof(gentry));
+
 	switch (cmd) {
 	case MTRRIOC_ADD_ENTRY:
 	case MTRRIOC_SET_ENTRY:
diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
deleted file mode 100644
index 161c95059044..000000000000
--- a/arch/x86/kernel/macros.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file includes headers whose assembly part includes macros which are
- * commonly used. The macros are precompiled into assmebly file which is later
- * assembled together with each compiled file.
- */
-
-#include <linux/compiler.h>
-#include <asm/refcount.h>
-#include <asm/alternative-asm.h>
-#include <asm/bug.h>
-#include <asm/paravirt.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/jump_label.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bbfbf017065c..ddd4fa718c43 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -339,24 +339,6 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
 	return base;
 }
 
-void x86_fsbase_write_cpu(unsigned long fsbase)
-{
-	/*
-	 * Set the selector to 0 as a notion, that the segment base is
-	 * overwritten, which will be checked for skipping the segment load
-	 * during context switch.
-	 */
-	loadseg(FS, 0);
-	wrmsrl(MSR_FS_BASE, fsbase);
-}
-
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
-{
-	/* Set the selector to 0 for the same reason as %fs above. */
-	loadseg(GS, 0);
-	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
-}
-
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
 	unsigned long fsbase;
@@ -385,38 +367,18 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
 	return gsbase;
 }
 
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
+void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
 {
-	/*
-	 * Not strictly needed for %fs, but do it for symmetry
-	 * with %gs
-	 */
-	if (unlikely(fsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.fsbase = fsbase;
-	if (task == current)
-		x86_fsbase_write_cpu(fsbase);
-	task->thread.fsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
+void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
 {
-	if (unlikely(gsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.gsbase = gsbase;
-	if (task == current)
-		x86_gsbase_write_cpu_inactive(gsbase);
-	task->thread.gsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
@@ -754,11 +716,60 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 
 	switch (option) {
 	case ARCH_SET_GS: {
-		ret = x86_gsbase_write_task(task, arg2);
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * ARCH_SET_GS has always overwritten the index
+		 * and the base. Zero is the most sensible value
+		 * to put in the index, and is the only value that
+		 * makes any sense if FSGSBASE is unavailable.
+		 */
+		if (task == current) {
+			loadseg(GS, 0);
+			x86_gsbase_write_cpu_inactive(arg2);
+
+			/*
+			 * On non-FSGSBASE systems, save_base_legacy() expects
+			 * that we also fill in thread.gsbase.
+			 */
+			task->thread.gsbase = arg2;
+
+		} else {
+			task->thread.gsindex = 0;
+			x86_gsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_SET_FS: {
-		ret = x86_fsbase_write_task(task, arg2);
+		/*
+		 * Not strictly needed for %fs, but do it for symmetry
+		 * with %gs
+		 */
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * Set the selector to 0 for the same reason
+		 * as %gs above.
+		 */
+		if (task == current) {
+			loadseg(FS, 0);
+			x86_fsbase_write_cpu(arg2);
+
+			/*
+			 * On non-FSGSBASE systems, save_base_legacy() expects
+			 * that we also fill in thread.fsbase.
+			 */
+			task->thread.fsbase = arg2;
+		} else {
+			task->thread.fsindex = 0;
+			x86_fsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_GET_FS: {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ffae9b9740fd..4b8ee05dd6ad 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -397,11 +397,12 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		/*
-		 * When changing the FS base, use the same
-		 * mechanism as for do_arch_prctl_64().
+		 * When changing the FS base, use do_arch_prctl_64()
+		 * to set the index to zero and to set the base
+		 * as requested.
 		 */
 		if (child->thread.fsbase != value)
-			return x86_fsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_FS, value);
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
 		/*
@@ -410,7 +411,7 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		if (child->thread.gsbase != value)
-			return x86_gsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_GS, value);
 		return 0;
 #endif
 	}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index fc37bbd23eb8..abcb8d00b014 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -55,10 +55,10 @@ struct addr_marker {
 enum address_markers_idx {
 	USER_SPACE_NR = 0,
 	KERNEL_SPACE_NR,
-	LOW_KERNEL_NR,
-#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
 	LDT_NR,
 #endif
+	LOW_KERNEL_NR,
 	VMALLOC_START_NR,
 	VMEMMAP_START_NR,
 #ifdef CONFIG_KASAN
@@ -66,9 +66,6 @@ enum address_markers_idx {
 	KASAN_SHADOW_END_NR,
 #endif
 	CPU_ENTRY_AREA_NR,
-#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
-	LDT_NR,
-#endif
 #ifdef CONFIG_X86_ESPFIX64
 	ESPFIX_START_NR,
 #endif
@@ -512,11 +509,11 @@ static inline bool is_hypervisor_range(int idx)
 {
 #ifdef CONFIG_X86_64
 	/*
-	 * ffff800000000000 - ffff87ffffffffff is reserved for
-	 * the hypervisor.
+	 * A hole in the beginning of kernel address space reserved
+	 * for a hypervisor.
 	 */
-	return	(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
-		(idx <  pgd_index(__PAGE_OFFSET));
+	return	(idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) &&
+		(idx <  pgd_index(GUARD_HOLE_END_ADDR));
 #else
 	return false;
 #endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index db7a10082238..a1bcde35db4c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -285,20 +285,16 @@ static void cpa_flush_all(unsigned long cache)
 	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
 
-static bool __cpa_flush_range(unsigned long start, int numpages, int cache)
+static bool __inv_flush_all(int cache)
 {
 	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
-	WARN_ON(PAGE_ALIGN(start) != start);
-
 	if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		cpa_flush_all(cache);
 		return true;
 	}
 
-	flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
-
-	return !cache;
+	return false;
 }
 
 static void cpa_flush_range(unsigned long start, int numpages, int cache)
@@ -306,7 +302,14 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	unsigned int i, level;
 	unsigned long addr;
 
-	if (__cpa_flush_range(start, numpages, cache))
+	WARN_ON(PAGE_ALIGN(start) != start);
+
+	if (__inv_flush_all(cache))
+		return;
+
+	flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
+
+	if (!cache)
 		return;
 
 	/*
@@ -332,7 +335,12 @@ static void cpa_flush_array(unsigned long baddr, unsigned long *start,
 {
 	unsigned int i, level;
 
-	if (__cpa_flush_range(baddr, numpages, cache))
+	if (__inv_flush_all(cache))
+		return;
+
+	flush_tlb_all();
+
+	if (!cache)
 		return;
 
 	/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 08013524fba1..4fe956a63b25 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -519,8 +519,13 @@ static u64 sanitize_phys(u64 address)
 	 * for a "decoy" virtual address (bit 63 clear) passed to
 	 * set_memory_X(). __pa() on a "decoy" address results in a
 	 * physical address with bit 63 set.
+	 *
+	 * Decoy addresses are not present for 32-bit builds, see
+	 * set_mce_nospec().
 	 */
-	return address & __PHYSICAL_MASK;
+	if (IS_ENABLED(CONFIG_X86_64))
+		return address & __PHYSICAL_MASK;
+	return address;
 }
 
 /*
@@ -546,7 +551,11 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
 
 	start = sanitize_phys(start);
 	end = sanitize_phys(end);
-	BUG_ON(start >= end); /* end is exclusive */
+	if (start >= end) {
+		WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
+				start, end - 1, cattr_name(req_type));
+		return -EINVAL;
+	}
 
 	if (!pat_enabled()) {
 		/* This is identical to page table setting without PAT */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a5d7ed125337..0f4fe206dcc2 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -648,19 +648,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 			  unsigned long limit)
 {
 	int i, nr, flush = 0;
-	unsigned hole_low, hole_high;
+	unsigned hole_low = 0, hole_high = 0;
 
 	/* The limit is the last byte to be touched */
 	limit--;
 	BUG_ON(limit >= FIXADDR_TOP);
 
+#ifdef CONFIG_X86_64
 	/*
 	 * 64-bit has a great big hole in the middle of the address
-	 * space, which contains the Xen mappings.  On 32-bit these
-	 * will end up making a zero-sized hole and so is a no-op.
+	 * space, which contains the Xen mappings.
 	 */
-	hole_low = pgd_index(USER_LIMIT);
-	hole_high = pgd_index(PAGE_OFFSET);
+	hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
+	hole_high = pgd_index(GUARD_HOLE_END_ADDR);
+#endif
 
 	nr = pgd_index(limit) + 1;
 	for (i = 0; i < nr; i++) {
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index cdafa5edea49..20561a60db9c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -17,8 +17,10 @@
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
 
-struct bug_entry {
+#ifdef CONFIG_BUG
+
 #ifdef CONFIG_GENERIC_BUG
+struct bug_entry {
 #ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 	unsigned long	bug_addr;
 #else
@@ -33,10 +35,8 @@ struct bug_entry {
 	unsigned short	line;
 #endif
 	unsigned short	flags;
-#endif	/* CONFIG_GENERIC_BUG */
 };
-
-#ifdef CONFIG_BUG
+#endif	/* CONFIG_GENERIC_BUG */
 
 /*
  * Don't use BUG() or BUG_ON() unless there's really no way out; one
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 06396c1cf127..fc5004a4b07d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,13 +99,22 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  * unique, to convince GCC not to merge duplicate inline asm statements.
  */
 #define annotate_reachable() ({						\
-	asm volatile("ANNOTATE_REACHABLE counter=%c0"			\
-		     : : "i" (__COUNTER__));				\
+	asm volatile("%c0:\n\t"						\
+		     ".pushsection .discard.reachable\n\t"		\
+		     ".long %c0b - .\n\t"				\
+		     ".popsection\n\t" : : "i" (__COUNTER__));		\
 })
 #define annotate_unreachable() ({					\
-	asm volatile("ANNOTATE_UNREACHABLE counter=%c0"			\
-		     : : "i" (__COUNTER__));				\
+	asm volatile("%c0:\n\t"						\
+		     ".pushsection .discard.unreachable\n\t"		\
+		     ".long %c0b - .\n\t"				\
+		     ".popsection\n\t" : : "i" (__COUNTER__));		\
 })
+#define ASM_UNREACHABLE							\
+	"999:\n\t"							\
+	".pushsection .discard.unreachable\n\t"				\
+	".long 999b - .\n\t"						\
+	".popsection\n\t"
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
@@ -293,45 +302,6 @@ static inline void *offset_to_ptr(const int *off)
 	return (void *)((unsigned long)off + *off);
 }
 
-#else /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-#ifndef LINKER_SCRIPT
-
-#ifdef CONFIG_STACK_VALIDATION
-.macro ANNOTATE_UNREACHABLE counter:req
-\counter:
-	.pushsection .discard.unreachable
-	.long \counter\()b -.
-	.popsection
-.endm
-
-.macro ANNOTATE_REACHABLE counter:req
-\counter:
-	.pushsection .discard.reachable
-	.long \counter\()b -.
-	.popsection
-.endm
-
-.macro ASM_UNREACHABLE
-999:
-	.pushsection .discard.unreachable
-	.long 999b - .
-	.popsection
-.endm
-#else /* CONFIG_STACK_VALIDATION */
-.macro ANNOTATE_UNREACHABLE counter:req
-.endm
-
-.macro ANNOTATE_REACHABLE counter:req
-.endm
-
-.macro ASM_UNREACHABLE
-.endm
-#endif /* CONFIG_STACK_VALIDATION */
-
-#endif /* LINKER_SCRIPT */
-#endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
 /* Compile time object size, -1 for unknown */
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index bb015551c2d9..3d09844405c9 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -115,9 +115,7 @@ __cc-option = $(call try-run,\
 
 # Do not attempt to build with gcc plugins during cc-option tests.
 # (And this uses delayed resolution so the flags will be up to date.)
-# In addition, do not include the asm macros which are built later.
-CC_OPTION_FILTERED = $(GCC_PLUGINS_CFLAGS) $(ASM_MACRO_FLAGS)
-CC_OPTION_CFLAGS = $(filter-out $(CC_OPTION_FILTERED),$(KBUILD_CFLAGS))
+CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
 
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index a5b4af47987a..42c5d50f2bcc 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -4,8 +4,6 @@ OBJECT_FILES_NON_STANDARD := y
 hostprogs-y	:= modpost mk_elfconfig
 always		:= $(hostprogs-y) empty.o
 
-CFLAGS_REMOVE_empty.o := $(ASM_MACRO_FLAGS)
-
 modpost-objs	:= modpost.o file2alias.o sumversion.o
 
 devicetable-offsets-file := devicetable-offsets.h

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-12-09 22:06 Ingo Molnar
  2018-12-09 23:45 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-12-09 22:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ac3e233d29f7f77f28243af0132057d378d3ea58 x86/vdso: Drop implicit common-page-size linker flag

Three fixes: a boot parameter re-(re-)fix, a retpoline build artifact fix 
and an LLVM workaround.

 Thanks,

	Ingo

------------------>
Juergen Gross (1):
      x86/boot: Clear RSDP address in boot_params for broken loaders

Masahiro Yamada (1):
      x86/build: Fix compiler support check for CONFIG_RETPOLINE

Nick Desaulniers (1):
      x86/vdso: Drop implicit common-page-size linker flag


 arch/x86/Makefile                      | 10 +++++++---
 arch/x86/entry/vdso/Makefile           |  4 ++--
 arch/x86/include/asm/bootparam_utils.h |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f5d7f4134524..75ef499a66e2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,9 +220,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-ifeq ($(RETPOLINE_CFLAGS),)
-  $(error You are building kernel with non-retpoline compiler, please update your compiler.)
-endif
   KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
 endif
 
@@ -307,6 +304,13 @@ ifndef CC_HAVE_ASM_GOTO
 	@echo Compiler lacks asm-goto support.
 	@exit 1
 endif
+ifdef CONFIG_RETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+	@echo "You are building kernel with non-retpoline compiler." >&2
+	@echo "Please update your compiler." >&2
+	@false
+endif
+endif
 
 archclean:
 	$(Q)rm -rf $(objtree)/arch/i386
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 141d415a8c80..0624bf2266fd 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -47,7 +47,7 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
 CPPFLAGS_vdso.lds += -P -C
 
 VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
-			-z max-page-size=4096 -z common-page-size=4096
+			-z max-page-size=4096
 
 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
@@ -98,7 +98,7 @@ CFLAGS_REMOVE_vvar.o = -pg
 
 CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \
-			   -z max-page-size=4096 -z common-page-size=4096
+			   -z max-page-size=4096
 
 # x32-rebranded versions
 vobjx32s-y := $(vobjs-y:.o=-x32.o)
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index a07ffd23e4dd..f6f6ef436599 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -36,6 +36,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 	 */
 	if (boot_params->sentinel) {
 		/* fields in boot_params are left uninitialized, clear them */
+		boot_params->acpi_rsdp_addr = 0;
 		memset(&boot_params->ext_ramdisk_image, 0,
 		       (char *)&boot_params->efi_info -
 			(char *)&boot_params->ext_ramdisk_image);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-11-30  6:29 Ingo Molnar
  2018-11-30 21:00 ` pr-tracker-bot
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-11-30  6:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 60c8144afc287ef09ce8c1230c6aa972659ba1bb x86/MCE/AMD: Fix the thresholding machinery initialization order

Misc fixes:

 - an MCE related boot crash fix on certain AMD systems
 - an FPU exception handling fix
 - an FPU handling race fix
 - a revert+rewrite of the RSDP boot protocol extension, use boot_params 
   instead
 - a documentation fix

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/MCE/AMD: Fix the thresholding machinery initialization order

Elvira Khabirova (1):
      x86/ptrace: Fix documentation for tracehook_report_syscall_entry()

Jann Horn (1):
      x86/fpu: Use the correct exception table macro in the XSTATE_OP wrapper

Juergen Gross (2):
      x86/boot: Mostly revert commit ae7e1238e68f2a ("Add ACPI RSDP address to setup_header")
      x86/acpi, x86/boot: Take RSDP address from boot params if available

Sebastian Andrzej Siewior (1):
      x86/fpu: Disable bottom halves while loading FPU registers


 Documentation/x86/boot.txt            | 32 +-------------------------------
 arch/x86/boot/header.S                |  6 +-----
 arch/x86/include/asm/fpu/internal.h   |  2 +-
 arch/x86/include/asm/x86_init.h       |  2 --
 arch/x86/include/uapi/asm/bootparam.h |  7 ++-----
 arch/x86/kernel/acpi/boot.c           |  2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c  | 19 ++++++-------------
 arch/x86/kernel/fpu/signal.c          |  4 ++--
 arch/x86/kernel/head32.c              |  1 -
 arch/x86/kernel/head64.c              |  2 --
 arch/x86/kernel/setup.c               | 17 -----------------
 include/linux/tracehook.h             |  4 ++--
 12 files changed, 16 insertions(+), 82 deletions(-)

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 7727db8f94bc..5e9b826b5f62 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -61,18 +61,6 @@ Protocol 2.12:	(Kernel 3.8) Added the xloadflags field and extension fields
 	 	to struct boot_params for loading bzImage and ramdisk
 		above 4G in 64bit.
 
-Protocol 2.13:	(Kernel 3.14) Support 32- and 64-bit flags being set in
-		xloadflags to support booting a 64-bit kernel from 32-bit
-		EFI
-
-Protocol 2.14:	(Kernel 4.20) Added acpi_rsdp_addr holding the physical
-		address of the ACPI RSDP table.
-		The bootloader updates version with:
-		0x8000 | min(kernel-version, bootloader-version)
-		kernel-version being the protocol version supported by
-		the kernel and bootloader-version the protocol version
-		supported by the bootloader.
-
 **** MEMORY LAYOUT
 
 The traditional memory map for the kernel loader, used for Image or
@@ -209,7 +197,6 @@ Offset	Proto	Name		Meaning
 0258/8	2.10+	pref_address	Preferred loading address
 0260/4	2.10+	init_size	Linear memory required during initialization
 0264/4	2.11+	handover_offset	Offset of handover entry point
-0268/8	2.14+	acpi_rsdp_addr	Physical address of RSDP table
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -322,7 +309,7 @@ Protocol:	2.00+
   Contains the magic number "HdrS" (0x53726448).
 
 Field name:	version
-Type:		modify
+Type:		read
 Offset/size:	0x206/2
 Protocol:	2.00+
 
@@ -330,12 +317,6 @@ Protocol:	2.00+
   e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
   10.17.
 
-  Up to protocol version 2.13 this information is only read by the
-  bootloader. From protocol version 2.14 onwards the bootloader will
-  write the used protocol version or-ed with 0x8000 to the field. The
-  used protocol version will be the minimum of the supported protocol
-  versions of the bootloader and the kernel.
-
 Field name:	realmode_swtch
 Type:		modify (optional)
 Offset/size:	0x208/4
@@ -763,17 +744,6 @@ Offset/size:	0x264/4
 
   See EFI HANDOVER PROTOCOL below for more details.
 
-Field name:	acpi_rsdp_addr
-Type:		write
-Offset/size:	0x268/8
-Protocol:	2.14+
-
-  This field can be set by the boot loader to tell the kernel the
-  physical address of the ACPI RSDP table.
-
-  A value of 0 indicates the kernel should fall back to the standard
-  methods to locate the RSDP.
-
 
 **** THE IMAGE CHECKSUM
 
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4c881c850125..850b8762e889 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x020e		# header version number (>= 0x0105)
+		.word	0x020d		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -558,10 +558,6 @@ pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 init_size:		.long INIT_SIZE		# kernel initialization size
 handover_offset:	.long 0			# Filled in by build.c
 
-acpi_rsdp_addr:		.quad 0			# 64-bit physical pointer to the
-						# ACPI RSDP table, added with
-						# version 2.14
-
 # End of setup header #####################################################
 
 	.section ".entrytext", "ax"
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5f7290e6e954..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 		     "3: movl $-2,%[err]\n\t"				\
 		     "jmp 2b\n\t"					\
 		     ".popsection\n\t"					\
-		     _ASM_EXTABLE_UA(1b, 3b)				\
+		     _ASM_EXTABLE(1b, 3b)				\
 		     : [err] "=r" (err)					\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 0f842104862c..b85a7c54c6a1 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -303,6 +303,4 @@ extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
 extern bool x86_pnpbios_disabled(void);
 
-void x86_verify_bootdata_version(void);
-
 #endif
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 22f89d040ddd..60733f137e9a 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -16,9 +16,6 @@
 #define RAMDISK_PROMPT_FLAG		0x8000
 #define RAMDISK_LOAD_FLAG		0x4000
 
-/* version flags */
-#define VERSION_WRITTEN	0x8000
-
 /* loadflags */
 #define LOADED_HIGH	(1<<0)
 #define KASLR_FLAG	(1<<1)
@@ -89,7 +86,6 @@ struct setup_header {
 	__u64	pref_address;
 	__u32	init_size;
 	__u32	handover_offset;
-	__u64	acpi_rsdp_addr;
 } __attribute__((packed));
 
 struct sys_desc_table {
@@ -159,7 +155,8 @@ struct boot_params {
 	__u8  _pad2[4];					/* 0x054 */
 	__u64  tboot_addr;				/* 0x058 */
 	struct ist_info ist_info;			/* 0x060 */
-	__u8  _pad3[16];				/* 0x070 */
+	__u64 acpi_rsdp_addr;				/* 0x070 */
+	__u8  _pad3[8];					/* 0x078 */
 	__u8  hd0_info[16];	/* obsolete! */		/* 0x080 */
 	__u8  hd1_info[16];	/* obsolete! */		/* 0x090 */
 	struct sys_desc_table sys_desc_table; /* obsolete! */	/* 0x0a0 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 92c76bf97ad8..06635fbca81c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 
 u64 x86_default_get_root_pointer(void)
 {
-	return boot_params.hdr.acpi_rsdp_addr;
+	return boot_params.acpi_rsdp_addr;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index dd33c357548f..e12454e21b8a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -56,7 +56,7 @@
 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
 #define SMCA_THR_LVT_OFF	0xF000
 
-static bool thresholding_en;
+static bool thresholding_irq_en;
 
 static const char * const th_names[] = {
 	"load_store",
@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 
 set_offset:
 	offset = setup_APIC_mce_threshold(offset, new);
-
-	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
-		mce_threshold_vector = amd_threshold_interrupt;
+	if (offset == new)
+		thresholding_irq_en = true;
 
 done:
 	mce_threshold_block_init(&b, offset);
@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu)
 {
 	unsigned int bank;
 
-	if (!thresholding_en)
-		return 0;
-
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu)
 	struct threshold_bank **bp;
 	int err = 0;
 
-	if (!thresholding_en)
-		return 0;
-
 	bp = per_cpu(threshold_banks, cpu);
 	if (bp)
 		return 0;
@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void)
 {
 	unsigned lcpu = 0;
 
-	if (mce_threshold_vector == amd_threshold_interrupt)
-		thresholding_en = true;
-
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 		int err = mce_threshold_create_device(lcpu);
@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void)
 			return err;
 	}
 
+	if (thresholding_irq_en)
+		mce_threshold_vector = amd_threshold_interrupt;
+
 	return 0;
 }
 /*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 61a949d84dfa..d99a8ee9e185 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 
+		local_bh_disable();
 		fpu->initialized = 1;
-		preempt_disable();
 		fpu__restore(fpu);
-		preempt_enable();
+		local_bh_enable();
 
 		return err;
 	} else {
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 76fa3b836598..ec6fefbfd3c0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void)
 	cr4_init_shadow();
 
 	sanitize_boot_params(&boot_params);
-	x86_verify_bootdata_version();
 
 	x86_early_init_platform_quirks();
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7663a8eb602b..16b1cbd3a61e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
 	if (!boot_params.hdr.version)
 		copy_bootdata(__va(real_mode_data));
 
-	x86_verify_bootdata_version();
-
 	x86_early_init_platform_quirks();
 
 	switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b74e7bfed6ab..d494b9bfe618 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p)
 	unwind_init();
 }
 
-/*
- * From boot protocol 2.14 onwards we expect the bootloader to set the
- * version to "0x8000 | <used version>". In case we find a version >= 2.14
- * without the 0x8000 we assume the boot loader supports 2.13 only and
- * reset the version accordingly. The 0x8000 flag is removed in any case.
- */
-void __init x86_verify_bootdata_version(void)
-{
-	if (boot_params.hdr.version & VERSION_WRITTEN)
-		boot_params.hdr.version &= ~VERSION_WRITTEN;
-	else if (boot_params.hdr.version >= 0x020e)
-		boot_params.hdr.version = 0x020d;
-
-	if (boot_params.hdr.version < 0x020e)
-		boot_params.hdr.acpi_rsdp_addr = 0;
-}
-
 #ifdef CONFIG_X86_32
 
 static struct resource video_ram_resource = {
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 40b0b4c1bf7b..df20f8bdbfa3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs)
  * tracehook_report_syscall_entry - task is about to attempt a system call
  * @regs:		user register state of current task
  *
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just entered the kernel for a system call.
+ * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
+ * when the current task has just entered the kernel for a system call.
  * Full user register state is available here.  Changing the values
  * in @regs can affect the system call number and arguments to be tried.
  * It is safe to block here, preventing the system call from beginning.

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-11-03 23:09 Ingo Molnar
  2018-11-04  1:27 ` Linus Torvalds
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-11-03 23:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 23a12ddee1ce28065b71f14ccc695b5a0c8a64ff Merge branch 'core/urgent' into x86/urgent, to pick up objtool fix

A number of fixes and some late updates:

  - make in_compat_syscall() behavior on x86-32 similar to other 
    platforms, this touches a number of generic files but is not intended 
    to impact non-x86 platforms.

  - objtool fixes

  - PAT preemption fix

  - paravirt fixes/cleanups

  - cpufeatures updates for new instructions

  - earlyprintk quirk

  - make microcode version in sysfs world-readable (it is already world-readable in procfs) 

  - minor cleanups and fixes

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/firmware/efi/efivars.c     # 98f76206b335: compat: Cleanup in_compat_sy
  include/linux/compat.h             # a846446b1914: x86/compat: Adjust in_compat
  kernel/time/time.c                 # 98f76206b335: compat: Cleanup in_compat_sy
  net/xfrm/xfrm_state.c              # 98f76206b335: compat: Cleanup in_compat_sy
  net/xfrm/xfrm_user.c               # 98f76206b335: compat: Cleanup in_compat_sy
  tools/objtool/check.c              # 4a60aa05a063: objtool: Support per-functio
  tools/objtool/check.h              # 4a60aa05a063: objtool: Support per-functio
  tools/objtool/elf.c                # bcb6fb5da77c: objtool: Support GCC 9 cold 
                                   # 4a60aa05a063: objtool: Support per-functio
  tools/objtool/elf.h                # 4a60aa05a063: objtool: Support per-functio

 Thanks,

	Ingo

------------------>
Allan Xavier (1):
      objtool: Support per-function rodata sections

Dave Jiang (1):
      x86/numa_emulation: Fix uniform-split numa emulation

Dmitry Safonov (2):
      x86/compat: Adjust in_compat_syscall() to generic code under !COMPAT
      compat: Cleanup in_compat_syscall() callers

Feng Tang (1):
      x86/earlyprintk: Add a force option for pciserial device

Fenghua Yu (2):
      x86/cpufeatures: Enumerate MOVDIRI instruction
      x86/cpufeatures: Enumerate MOVDIR64B instruction

Jacek Tomaka (1):
      x86/microcode: Make revision and processor flags world-readable

Jordan Borgner (1):
      x86: Clean up 'sizeof x' => 'sizeof(x)'

Josh Poimboeuf (1):
      objtool: Support GCC 9 cold subfunction naming scheme

Juergen Gross (2):
      x86/paravirt: Remove GPL from pv_ops export
      x86/paravirt: Remove unused _paravirt_ident_32

Rasmus Villemoes (1):
      x86/traps: Use format string with panic() call

Sebastian Andrzej Siewior (1):
      x86/mm/pat: Disable preemption around __flush_tlb_all()


 Documentation/admin-guide/kernel-parameters.txt |  6 +++-
 arch/x86/boot/cpucheck.c                        |  2 +-
 arch/x86/boot/early_serial_console.c            |  4 +--
 arch/x86/boot/edd.c                             |  6 ++--
 arch/x86/boot/main.c                            |  4 +--
 arch/x86/boot/memory.c                          |  2 +-
 arch/x86/boot/regs.c                            |  2 +-
 arch/x86/boot/video-vesa.c                      |  6 ++--
 arch/x86/boot/video.c                           |  2 +-
 arch/x86/events/intel/core.c                    |  2 +-
 arch/x86/include/asm/compat.h                   |  9 +++++-
 arch/x86/include/asm/cpufeatures.h              |  2 ++
 arch/x86/include/asm/ftrace.h                   |  4 +--
 arch/x86/include/asm/paravirt_types.h           |  2 --
 arch/x86/include/asm/tlbflush.h                 |  6 ++++
 arch/x86/kernel/cpu/common.c                    |  4 +--
 arch/x86/kernel/cpu/mcheck/mce.c                |  2 +-
 arch/x86/kernel/cpu/microcode/core.c            |  4 +--
 arch/x86/kernel/cpu/mtrr/generic.c              |  2 +-
 arch/x86/kernel/cpu/mtrr/if.c                   |  6 ++--
 arch/x86/kernel/early_printk.c                  | 29 +++++++++++------
 arch/x86/kernel/head64.c                        |  2 +-
 arch/x86/kernel/msr.c                           |  8 ++---
 arch/x86/kernel/paravirt.c                      | 28 +++++------------
 arch/x86/kernel/paravirt_patch_32.c             | 18 ++++-------
 arch/x86/kernel/paravirt_patch_64.c             | 20 ++++--------
 arch/x86/kernel/process_64.c                    |  4 +--
 arch/x86/kernel/sys_x86_64.c                    | 11 ++++---
 arch/x86/kernel/traps.c                         |  2 +-
 arch/x86/kvm/emulate.c                          | 22 ++++++-------
 arch/x86/kvm/lapic.c                            |  2 +-
 arch/x86/kvm/x86.c                              | 42 ++++++++++++-------------
 arch/x86/mm/hugetlbpage.c                       |  4 +--
 arch/x86/mm/mmap.c                              |  2 +-
 arch/x86/mm/numa_emulation.c                    | 12 +++++--
 arch/x86/mm/pageattr.c                          |  6 +++-
 arch/x86/tools/relocs.c                         |  4 +--
 arch/x86/um/asm/elf.h                           |  2 +-
 drivers/firmware/efi/efivars.c                  | 16 +++-------
 include/linux/compat.h                          |  4 +--
 kernel/time/time.c                              |  2 +-
 net/xfrm/xfrm_state.c                           |  2 --
 net/xfrm/xfrm_user.c                            |  2 --
 tools/objtool/check.c                           | 38 ++++++++++++++++++----
 tools/objtool/check.h                           |  4 +--
 tools/objtool/elf.c                             |  3 +-
 tools/objtool/elf.h                             |  3 +-
 47 files changed, 198 insertions(+), 171 deletions(-)


^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-10-20  8:54 Ingo Molnar
  2018-10-20 13:28 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-10-20  8:54 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Greg,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 485734f3fc77c1eb77ffe138c027b9a4bf0178f3 x86/swiotlb: Enable swiotlb for > 4GiG RAM on 32-bit kernels

It's 4 misc fixes, 3 build warning fixes and 3 comment fixes.

In hindsight I'd have left out the 3 comment fixes to make the pull 
request look less scary at such a late point in the cycle. :-/

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/entry/64: Further improve paranoid_entry comments

Christoph Hellwig (1):
      x86/swiotlb: Enable swiotlb for > 4GiG RAM on 32-bit kernels

Dave Hansen (1):
      x86/entry: Add some paranoid entry/exit CR3 handling comments

Jan Kiszka (1):
      x86/entry/32: Clear the CS high bits

Nathan Chancellor (2):
      x86/time: Correct the attribute on jiffies' definition
      x86/boot: Add -Wno-pointer-sign to KBUILD_CFLAGS

Peter Zijlstra (2):
      x86/tsc: Force inlining of cyc2ns bits
      x86/percpu: Fix this_cpu_read()

Sebastian Andrzej Siewior (2):
      x86/fpu: Remove second definition of fpu in __fpu__restore_sig()
      x86/fpu: Fix i486 + no387 boot crash by only saving FPU registers on context switch if there is an FPU


 arch/x86/boot/compressed/Makefile   |  1 +
 arch/x86/entry/entry_32.S           | 13 +++++++------
 arch/x86/entry/entry_64.S           | 13 +++++++++++++
 arch/x86/include/asm/fpu/internal.h |  2 +-
 arch/x86/include/asm/percpu.h       |  8 ++++----
 arch/x86/kernel/fpu/signal.c        |  1 -
 arch/x86/kernel/pci-swiotlb.c       |  2 --
 arch/x86/kernel/time.c              |  2 +-
 arch/x86/kernel/tsc.c               |  6 +++---
 9 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 28764dacf018..466f66c8a7f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
 KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..fbbf1ba57ec6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -389,6 +389,13 @@
 	 * that register for the time this macro runs
 	 */
 
+	/*
+	 * The high bits of the CS dword (__csh) are used for
+	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
+	 * hardware didn't do this for us.
+	 */
+	andl	$(0x0000ffff), PT_CS(%esp)
+
 	/* Are we on the entry stack? Bail out if not! */
 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
 	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -407,12 +414,6 @@
 	/* Load top of task-stack into %edi */
 	movl	TSS_entry2task_stack(%edi), %edi
 
-	/*
-	 * Clear unused upper bits of the dword containing the word-sized CS
-	 * slot in pt_regs in case hardware didn't clear it for us.
-	 */
-	andl	$(0x0000ffff), PT_CS(%esp)
-
 	/* Special case - entry from kernel mode via entry stack */
 #ifdef CONFIG_VM86
 	movl	PT_EFLAGS(%esp), %ecx		# mix EFLAGS and CS
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..f95dcb209fdf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry)
 	xorl	%ebx, %ebx
 
 1:
+	/*
+	 * Always stash CR3 in %r14.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 *
+	 * This is also why CS (stashed in the "iret frame" by the
+	 * hardware at entry) can not be used: this may be a return
+	 * to kernel code, but with a user CR3 value.
+	 */
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
 	ret
@@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit)
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 	SWAPGS_UNSAFE_STACK
 	jmp	.Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
 	jmp restore_regs_and_return_to_kernel
@@ -1626,6 +1638,7 @@ end_repeat_nmi:
 	movq	$-1, %rsi
 	call	do_nmi
 
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
 
 	testl	%ebx, %ebx			/* swapgs needed? */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->initialized) {
+	if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e9202a0de8f0..1a19d11cfbbd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -185,22 +185,22 @@ do {									\
 	typeof(var) pfo_ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b "__percpu_arg(1)",%0"		\
+		asm volatile(op "b "__percpu_arg(1)",%0"\
 		    : "=q" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 2:						\
-		asm(op "w "__percpu_arg(1)",%0"		\
+		asm volatile(op "w "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 4:						\
-		asm(op "l "__percpu_arg(1)",%0"		\
+		asm volatile(op "l "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 8:						\
-		asm(op "q "__percpu_arg(1)",%0"		\
+		asm volatile(op "q "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23f1691670b6..61a949d84dfa 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 * thread's fpu state, reconstruct fxstate from the fsave
 		 * header. Validate and sanitize the copied state.
 		 */
-		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
 		int err = 0;
 
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 661583662430..71c0b01d93b1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
 int __init pci_swiotlb_detect_4gb(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-#ifdef CONFIG_X86_64
 	if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
 		swiotlb = 1;
-#endif
 
 	/*
 	 * If SME is active then swiotlb will be set to 1 so that bounce
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be01328eb755..fddaefc51fb6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,7 +25,7 @@
 #include <asm/time.h>
 
 #ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 #endif
 
 unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b52bd2b6cdb4..6d5dc5dabfd7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -58,7 +58,7 @@ struct cyc2ns {
 
 static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
 
-void cyc2ns_read_begin(struct cyc2ns_data *data)
+void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
 {
 	int seq, idx;
 
@@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data)
 	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
 }
 
-void cyc2ns_read_end(void)
+void __always_inline cyc2ns_read_end(void)
 {
 	preempt_enable_notrace();
 }
@@ -104,7 +104,7 @@ void cyc2ns_read_end(void)
  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
 
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
 	struct cyc2ns_data data;
 	unsigned long long ns;

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-10-11  9:14 Ingo Molnar
  2018-10-11 12:32 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-10-11  9:14 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Greg,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 184d47f0fd365108bd06ab26cdb3450b716269fd x86/mm: Avoid VLA in pgd_alloc()

An intel_rdt memory access fix and a VLA fix in pgd_alloc().

 Thanks,

	Ingo

------------------>
Kees Cook (1):
      x86/mm: Avoid VLA in pgd_alloc()

Reinette Chatre (1):
      x86/intel_rdt: Fix out-of-bounds memory access in CBM tests


 arch/x86/kernel/cpu/intel_rdt.h             |  6 ++---
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 20 ++++++++--------
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c    | 36 ++++++++++++++++++-----------
 arch/x86/mm/pgtable.c                       | 10 ++++++--
 4 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 285eb3ec4200..3736f6dc9545 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 int rdtgroup_schemata_show(struct kernfs_open_file *of,
 			   struct seq_file *s, void *v);
 bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-			   u32 _cbm, int closid, bool exclusive);
+			   unsigned long cbm, int closid, bool exclusive);
 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
-				  u32 cbm);
+				  unsigned long cbm);
 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
 int rdtgroup_tasks_assigned(struct rdtgroup *r);
 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
 int rdt_pseudo_lock_init(void);
 void rdt_pseudo_lock_release(void);
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index 40f3903ae5d9..f8c260d522ca 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
 /**
  * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
  * @d: RDT domain
- * @_cbm: CBM to test
+ * @cbm: CBM to test
  *
- * @d represents a cache instance and @_cbm a capacity bitmask that is
- * considered for it. Determine if @_cbm overlaps with any existing
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
  * pseudo-locked region on @d.
  *
- * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
  * otherwise.
  */
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
 {
-	unsigned long *cbm = (unsigned long *)&_cbm;
-	unsigned long *cbm_b;
 	unsigned int cbm_len;
+	unsigned long cbm_b;
 
 	if (d->plr) {
 		cbm_len = d->plr->r->cache.cbm_len;
-		cbm_b = (unsigned long *)&d->plr->cbm;
-		if (bitmap_intersects(cbm, cbm_b, cbm_len))
+		cbm_b = d->plr->cbm;
+		if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
 			return true;
 	}
 	return false;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 1b8e86a5d5e1..b140c68bc14b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -975,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
  * is false then overlaps with any resource group or hardware entities
  * will be considered.
  *
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
  * Return: false if CBM does not overlap, true if it does.
  */
 bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-			   u32 _cbm, int closid, bool exclusive)
+			   unsigned long cbm, int closid, bool exclusive)
 {
-	unsigned long *cbm = (unsigned long *)&_cbm;
-	unsigned long *ctrl_b;
 	enum rdtgrp_mode mode;
+	unsigned long ctrl_b;
 	u32 *ctrl;
 	int i;
 
 	/* Check for any overlap with regions used by hardware directly */
 	if (!exclusive) {
-		if (bitmap_intersects(cbm,
-				      (unsigned long *)&r->cache.shareable_bits,
-				      r->cache.cbm_len))
+		ctrl_b = r->cache.shareable_bits;
+		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
 			return true;
 	}
 
 	/* Check for overlap with other resource groups */
 	ctrl = d->ctrl_val;
 	for (i = 0; i < closids_supported(); i++, ctrl++) {
-		ctrl_b = (unsigned long *)ctrl;
+		ctrl_b = *ctrl;
 		mode = rdtgroup_mode_by_closid(i);
 		if (closid_allocated(i) && i != closid &&
 		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
-			if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
 				if (exclusive) {
 					if (mode == RDT_MODE_EXCLUSIVE)
 						return true;
@@ -1138,15 +1139,18 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
  * computed by first dividing the total cache size by the CBM length to
  * determine how many bytes each bit in the bitmask represents. The result
  * is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
  */
 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
-				  struct rdt_domain *d, u32 cbm)
+				  struct rdt_domain *d, unsigned long cbm)
 {
 	struct cpu_cacheinfo *ci;
 	unsigned int size = 0;
 	int num_b, i;
 
-	num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
 	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
 	for (i = 0; i < ci->num_leaves; i++) {
 		if (ci->info_list[i].level == r->cache_level) {
@@ -2353,6 +2357,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 	u32 used_b = 0, unused_b = 0;
 	u32 closid = rdtgrp->closid;
 	struct rdt_resource *r;
+	unsigned long tmp_cbm;
 	enum rdtgrp_mode mode;
 	struct rdt_domain *d;
 	int i, ret;
@@ -2390,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 			 * modify the CBM based on system availability.
 			 */
 			cbm_ensure_valid(&d->new_ctrl, r);
-			if (bitmap_weight((unsigned long *) &d->new_ctrl,
-					  r->cache.cbm_len) <
-					r->cache.min_cbm_bits) {
+			/*
+			 * Assign the u32 CBM to an unsigned long to ensure
+			 * that bitmap_weight() does not access out-of-bound
+			 * memory.
+			 */
+			tmp_cbm = d->new_ctrl;
+			if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+			    r->cache.min_cbm_bits) {
 				rdt_last_cmd_printf("no space on %s:%d\n",
 						    r->name, d->id);
 				return -ENOSPC;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..59274e2c1ac4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd)
 
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+#define MAX_UNSHARED_PTRS_PER_PGD			\
+	max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
 
 
 static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
@@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd)
  * and initialize the kernel pmds here.
  */
 #define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD
+#define MAX_PREALLOCATED_PMDS	MAX_UNSHARED_PTRS_PER_PGD
 
 /*
  * We allocate separate PMDs for the kernel part of the user page-table
@@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd)
  */
 #define PREALLOCATED_USER_PMDS	 (static_cpu_has(X86_FEATURE_PTI) ? \
 					KERNEL_PGD_PTRS : 0)
+#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
 
 void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 {
@@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 
 /* No need to prepopulate any pagetable entries in non-PAE modes. */
 #define PREALLOCATED_PMDS	0
+#define MAX_PREALLOCATED_PMDS	0
 #define PREALLOCATED_USER_PMDS	 0
+#define MAX_PREALLOCATED_USER_PMDS 0
 #endif	/* CONFIG_X86_PAE */
 
 static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
@@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd)
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
-	pmd_t *u_pmds[PREALLOCATED_USER_PMDS];
-	pmd_t *pmds[PREALLOCATED_PMDS];
+	pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
+	pmd_t *pmds[MAX_PREALLOCATED_PMDS];
 
 	pgd = _pgd_alloc();
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-10-05  9:53 Ingo Molnar
  2018-10-05 23:06 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-10-05  9:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Andy Lutomirski

Greg,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 02e425668f5c9deb42787d10001a3b605993ad15 x86/vdso: Fix vDSO syscall fallback asm constraint regression

Misc fixes:

 - fix various vDSO bugs: asm constraints and retpolines
 - add vDSO test units to make sure they never re-appear
 - fix UV platform TSC initialization bug
 - fix build warning on Clang

 Thanks,

	Ingo

------------------>
Andy Lutomirski (4):
      x86/vdso: Fix asm constraints on vDSO syscall fallbacks
      selftests/x86: Add clock_gettime() tests to test_vdso
      x86/vdso: Only enable vDSO retpolines when enabled and supported
      x86/vdso: Fix vDSO syscall fallback asm constraint regression

Mike Travis (2):
      x86/platform/uv: Provide is_early_uv_system()
      x86/tsc: Fix UV TSC initialization

Nathan Chancellor (1):
      x86/cpu/amd: Remove unnecessary parentheses


 arch/x86/entry/vdso/Makefile            |  16 ++-
 arch/x86/entry/vdso/vclock_gettime.c    |  26 ++---
 arch/x86/include/asm/uv/uv.h            |   6 ++
 arch/x86/kernel/cpu/amd.c               |   2 +-
 arch/x86/kernel/tsc.c                   |   4 +
 tools/testing/selftests/x86/test_vdso.c | 172 ++++++++++++++++++++++++++++++++
 6 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
        -fno-omit-frame-pointer -foptimize-sibling-calls \
-       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+  CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
 
 $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+  KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
 $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 
 $(obj)/vdso32.so.dbg: FORCE \
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..e48ca3afa091 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,8 +43,9 @@ extern u8 hvclock_page
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
-	asm("syscall" : "=a" (ret) :
-	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+	asm ("syscall" : "=a" (ret), "=m" (*ts) :
+	     "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+	     "memory", "rcx", "r11");
 	return ret;
 }
 
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
-	asm("syscall" : "=a" (ret) :
-	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+	asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+	     "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+	     "memory", "rcx", "r11");
 	return ret;
 }
 
@@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
 
-	asm(
+	asm (
 		"mov %%ebx, %%edx \n"
-		"mov %2, %%ebx \n"
+		"mov %[clock], %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
-		: "=a" (ret)
-		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+		: "=a" (ret), "=m" (*ts)
+		: "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
 		: "memory", "edx");
 	return ret;
 }
@@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
-	asm(
+	asm (
 		"mov %%ebx, %%edx \n"
-		"mov %2, %%ebx \n"
+		"mov %[tv], %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
-		: "=a" (ret)
-		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+		: "=a" (ret), "=m" (*tv), "=m" (*tz)
+		: "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
 		: "memory", "edx");
 	return ret;
 }
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index a80c0673798f..e60c45fd3679 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -10,8 +10,13 @@ struct cpumask;
 struct mm_struct;
 
 #ifdef CONFIG_X86_UV
+#include <linux/efi.h>
 
 extern enum uv_system_type get_uv_system_type(void);
+static inline bool is_early_uv_system(void)
+{
+	return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+}
 extern int is_uv_system(void);
 extern int is_uv_hubless(void);
 extern void uv_cpu_init(void);
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 #else	/* X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline bool is_early_uv_system(void)	{ return 0; }
 static inline int is_uv_system(void)	{ return 0; }
 static inline int is_uv_hubless(void)	{ return 0; }
 static inline void uv_cpu_init(void)	{ }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22ab408177b2..eeea634bee0a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
-	if ((c->x86 == 6)) {
+	if (c->x86 == 6) {
 		/* Duron Rev A0 */
 		if (c->x86_model == 3 && c->x86_stepping == 0)
 			size = 64;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6490f618e096..b52bd2b6cdb4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -26,6 +26,7 @@
 #include <asm/apic.h>
 #include <asm/intel-family.h>
 #include <asm/i8259.h>
+#include <asm/uv/uv.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return;
+	/* Don't change UV TSC multi-chassis synchronization */
+	if (is_early_uv_system())
+		return;
 	if (!determine_cpu_tsc_frequencies(true))
 		return;
 	loops_per_jiffy = get_loops_per_jiffy();
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
 #include <errno.h>
 #include <sched.h>
 #include <stdbool.h>
+#include <limits.h>
 
 #ifndef SYS_getcpu
 # ifdef __x86_64__
@@ -31,6 +32,14 @@
 
 int nerrs = 0;
 
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
 
 getcpu_t vgetcpu;
@@ -95,6 +104,15 @@ static void fill_function_pointers()
 		printf("Warning: failed to find getcpu in vDSO\n");
 
 	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
 	return syscall(__NR_getcpu, cpu, node, cache);
 }
 
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
 static void test_getcpu(void)
 {
 	printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +183,154 @@ static void test_getcpu(void)
 	}
 }
 
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
 int main(int argc, char **argv)
 {
 	fill_function_pointers();
 
+	test_clock_gettime();
+	test_gettimeofday();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
 	test_getcpu();
 
 	return nerrs ? 1 : 0;

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-09-15 13:24 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2018-09-15 13:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 002b87d2aace62b4f3841c3aa43309d2380092be x86/APM: Fix build warning when PROC_FS is not enabled

Misc fixes:

 - EFI crash fix
 - Xen PV fixes
 - do not allow PTI on 2-level 32-bit kernels for now
 - documentation fix

 Thanks,

	Ingo

------------------>
Boris Ostrovsky (1):
      x86/EISA: Don't probe EISA bus for Xen PV guests

Guenter Roeck (1):
      x86/efi: Load fixmap GDT in efi_call_phys_epilog() before setting %cr3

Joerg Roedel (1):
      Revert "x86/mm/legacy: Populate the user page-table with user pgd's"

Juergen Gross (1):
      x86/xen: Disable CPU0 hotplug for Xen PV

Randy Dunlap (2):
      x86/doc: Fix Documentation/x86/earlyprintk.txt
      x86/APM: Fix build warning when PROC_FS is not enabled


 Documentation/x86/earlyprintk.txt     | 25 +++++++++++++++----------
 arch/x86/include/asm/pgtable-2level.h |  9 ---------
 arch/x86/kernel/apm_32.c              |  2 ++
 arch/x86/kernel/eisa.c                | 10 ++++++++--
 arch/x86/kernel/topology.c            |  4 +++-
 arch/x86/platform/efi/efi_32.c        |  3 +--
 security/Kconfig                      |  2 +-
 7 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt
index 688e3eeed21d..46933e06c972 100644
--- a/Documentation/x86/earlyprintk.txt
+++ b/Documentation/x86/earlyprintk.txt
@@ -35,25 +35,25 @@ and two USB cables, connected like this:
 ( If your system does not list a debug port capability then you probably
   won't be able to use the USB debug key. )
 
- b.) You also need a Netchip USB debug cable/key:
+ b.) You also need a NetChip USB debug cable/key:
 
         http://www.plxtech.com/products/NET2000/NET20DC/default.asp
 
-     This is a small blue plastic connector with two USB connections,
+     This is a small blue plastic connector with two USB connections;
      it draws power from its USB connections.
 
  c.) You need a second client/console system with a high speed USB 2.0
      port.
 
- d.) The Netchip device must be plugged directly into the physical
+ d.) The NetChip device must be plugged directly into the physical
      debug port on the "host/target" system.  You cannot use a USB hub in
      between the physical debug port and the "host/target" system.
 
      The EHCI debug controller is bound to a specific physical USB
-     port and the Netchip device will only work as an early printk
+     port and the NetChip device will only work as an early printk
      device in this port.  The EHCI host controllers are electrically
      wired such that the EHCI debug controller is hooked up to the
-     first physical and there is no way to change this via software.
+     first physical port and there is no way to change this via software.
      You can find the physical port through experimentation by trying
      each physical port on the system and rebooting.  Or you can try
      and use lsusb or look at the kernel info messages emitted by the
@@ -65,9 +65,9 @@ and two USB cables, connected like this:
      to the hardware vendor, because there is no reason not to wire
      this port into one of the physically accessible ports.
 
- e.) It is also important to note, that many versions of the Netchip
+ e.) It is also important to note, that many versions of the NetChip
      device require the "client/console" system to be plugged into the
-     right and side of the device (with the product logo facing up and
+     right hand side of the device (with the product logo facing up and
      readable left to right).  The reason being is that the 5 volt
      power supply is taken from only one side of the device and it
      must be the side that does not get rebooted.
@@ -81,13 +81,18 @@ and two USB cables, connected like this:
       CONFIG_EARLY_PRINTK_DBGP=y
 
     And you need to add the boot command line: "earlyprintk=dbgp".
+
     (If you are using Grub, append it to the 'kernel' line in
-     /etc/grub.conf)
+     /etc/grub.conf.  If you are using Grub2 on a BIOS firmware system,
+     append it to the 'linux' line in /boot/grub2/grub.cfg. If you are
+     using Grub2 on an EFI firmware system, append it to the 'linux'
+     or 'linuxefi' line in /boot/grub2/grub.cfg or
+     /boot/efi/EFI/<distro>/grub.cfg.)
 
     On systems with more than one EHCI debug controller you must
     specify the correct EHCI debug controller number.  The ordering
     comes from the PCI bus enumeration of the EHCI controllers.  The
-    default with no number argument is "0" the first EHCI debug
+    default with no number argument is "0" or the first EHCI debug
     controller.  To use the second EHCI debug controller, you would
     use the command line: "earlyprintk=dbgp1"
 
@@ -111,7 +116,7 @@ and two USB cables, connected like this:
     see the raw output.
 
  c.) On Nvidia Southbridge based systems: the kernel will try to probe
-     and find out which port has debug device connected.
+     and find out which port has a debug device connected.
 
 3. Testing that it works fine:
 
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 24c6cf5f16b7..60d0f9015317 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -19,9 +19,6 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd);
-#endif
 	*pmdp = pmd;
 }
 
@@ -61,9 +58,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 #ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0));
-#endif
 	return __pmd(xchg((pmdval_t *)xp, 0));
 }
 #else
@@ -73,9 +67,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 #ifdef CONFIG_SMP
 static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0));
-#endif
 	return __pud(xchg((pudval_t *)xp, 0));
 }
 #else
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ec00d1ff5098..f7151cd03cb0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1640,6 +1640,7 @@ static int do_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+#ifdef CONFIG_PROC_FS
 static int proc_apm_show(struct seq_file *m, void *v)
 {
 	unsigned short	bx;
@@ -1719,6 +1720,7 @@ static int proc_apm_show(struct seq_file *m, void *v)
 		   units);
 	return 0;
 }
+#endif
 
 static int apm(void *unused)
 {
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index f260e452e4f8..e8c8c5d78dbd 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -7,11 +7,17 @@
 #include <linux/eisa.h>
 #include <linux/io.h>
 
+#include <xen/xen.h>
+
 static __init int eisa_bus_probe(void)
 {
-	void __iomem *p = ioremap(0x0FFFD9, 4);
+	void __iomem *p;
+
+	if (xen_pv_domain() && !xen_initial_domain())
+		return 0;
 
-	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+	p = ioremap(0x0FFFD9, 4);
+	if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
 		EISA_bus = 1;
 	iounmap(p);
 	return 0;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 12cbe2b88c0f..738bf42b0218 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -111,8 +111,10 @@ int arch_register_cpu(int num)
 	/*
 	 * Currently CPU0 is only hotpluggable on Intel platforms. Other
 	 * vendors can add hotplug support later.
+	 * Xen PV guests don't support CPU0 hotplug at all.
 	 */
-	if (c->x86_vendor != X86_VENDOR_INTEL)
+	if (c->x86_vendor != X86_VENDOR_INTEL ||
+	    boot_cpu_has(X86_FEATURE_XENPV))
 		cpu0_hotpluggable = 0;
 
 	/*
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 05ca14222463..9959657127f4 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,10 +85,9 @@ pgd_t * __init efi_call_phys_prolog(void)
 
 void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
+	load_fixmap_gdt(0);
 	load_cr3(save_pgd);
 	__flush_tlb_all();
-
-	load_fixmap_gdt(0);
 }
 
 void __init efi_runtime_update_mappings(void)
diff --git a/security/Kconfig b/security/Kconfig
index 27d8b2688f75..d9aa521b5206 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -57,7 +57,7 @@ config SECURITY_NETWORK
 config PAGE_TABLE_ISOLATION
 	bool "Remove the kernel mapping in user mode"
 	default y
-	depends on X86 && !UML
+	depends on (X86_64 || X86_PAE) && !UML
 	help
 	  This feature reduces the number of hardware side channels by
 	  ensuring that the majority of kernel addresses are not mapped

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-07-30 17:59 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2018-07-30 17:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 92a4728608a8fd228c572bc8ff50dd98aa0ddf2a x86/boot: Fix if_changed build flip/flop bug

Misc fixes:

 - a build race fix
 - a Xen entry fix
 - a TSC_DEADLINE quirk future-proofing fix
 
 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/entry/64: Remove %ebx handling from error_entry/exit

Kees Cook (1):
      x86/boot: Fix if_changed build flip/flop bug

Len Brown (1):
      x86/apic: Future-proof the TSC_DEADLINE quirk for SKX


 arch/x86/boot/compressed/Makefile |  8 ++++++--
 arch/x86/entry/entry_64.S         | 18 ++++--------------
 arch/x86/kernel/apic/apic.c       |  3 +++
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fa42f895fdde..169c2feda14a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,9 +106,13 @@ define cmd_check_data_rel
 	done
 endef
 
+# We need to run two commands under "if_changed", so merge them into a
+# single invocation.
+quiet_cmd_check-and-link-vmlinux = LD      $@
+      cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
+
 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE
-	$(call if_changed,check_data_rel)
-	$(call if_changed,ld)
+	$(call if_changed,check-and-link-vmlinux)
 
 OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
 $(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73a522d53b53..8ae7ffda8f98 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -981,7 +981,7 @@ ENTRY(\sym)
 
 	call	\do_sym
 
-	jmp	error_exit			/* %ebx: no swapgs flag */
+	jmp	error_exit
 	.endif
 END(\sym)
 .endm
@@ -1222,7 +1222,6 @@ END(paranoid_exit)
 
 /*
  * Save all registers in pt_regs, and switch GS if needed.
- * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
 	UNWIND_HINT_FUNC
@@ -1269,7 +1268,6 @@ ENTRY(error_entry)
 	 * for these here too.
 	 */
 .Lerror_kernelspace:
-	incl	%ebx
 	leaq	native_irq_return_iret(%rip), %rcx
 	cmpq	%rcx, RIP+8(%rsp)
 	je	.Lerror_bad_iret
@@ -1303,28 +1301,20 @@ ENTRY(error_entry)
 
 	/*
 	 * Pretend that the exception came from user mode: set up pt_regs
-	 * as if we faulted immediately after IRET and clear EBX so that
-	 * error_exit knows that we will be returning to user mode.
+	 * as if we faulted immediately after IRET.
 	 */
 	mov	%rsp, %rdi
 	call	fixup_bad_iret
 	mov	%rax, %rsp
-	decl	%ebx
 	jmp	.Lerror_entry_from_usermode_after_swapgs
 END(error_entry)
 
-
-/*
- * On entry, EBX is a "return to kernel mode" flag:
- *   1: already in kernel mode, don't need SWAPGS
- *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
- */
 ENTRY(error_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
-	testl	%ebx, %ebx
-	jnz	retint_kernel
+	testb	$3, CS(%rsp)
+	jz	retint_kernel
 	jmp	retint_user
 END(error_exit)
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2aabd4cb0e3f..adbda5847b14 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void)
 	case 0x04: return 0x02000014;
 	}
 
+	if (boot_cpu_data.x86_stepping > 4)
+		return 0;
+
 	return ~0U;
 }
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-06-30  8:49 Ingo Molnar
  2018-06-30 19:01 ` Linus Torvalds
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-06-30  8:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: d79d0d8ad0cb3d782b41631dfeac8eb05e414bcd x86/mm: Clean up the printk()s in show_fault_oops()

The biggest diffstat comes from self-test updates, plus there's entry code fixes, 
5-level paging related fixes, console debug output fixes, plus misc fixes.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  tools/testing/selftests/x86/sigreturn.c# e8a445dea219: selftests/x86/sigreturn: Do 
                                   # ec3480205660: selftests/x86/sigreturn/64: 

 Thanks,

	Ingo

------------------>
Andrey Ryabinin (1):
      x86/mm: Don't free P4D table when it is folded at runtime

Andy Lutomirski (3):
      x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80"
      selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs
      selftests/x86/sigreturn: Do minor cleanups

Dmitry Vyukov (2):
      x86/mm: Get rid of KERN_CONT in show_fault_oops()
      x86/mm: Clean up the printk()s in show_fault_oops()

Jan Beulich (1):
      x86/entry/32: Add explicit 'l' instruction suffix

Kirill A. Shutemov (2):
      x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y
      x86/mm: Drop unneeded __always_inline for p4d page table helpers


 arch/x86/entry/entry_32.S               |  2 +-
 arch/x86/entry/entry_64_compat.S        | 16 ++++-----
 arch/x86/include/asm/pgalloc.h          |  3 ++
 arch/x86/include/asm/pgtable.h          |  2 +-
 arch/x86/include/asm/pgtable_64.h       |  4 +--
 arch/x86/mm/fault.c                     | 21 ++++--------
 arch/x86/platform/efi/efi_64.c          |  4 +--
 tools/testing/selftests/x86/sigreturn.c | 59 ++++++++++++++++++++-------------
 8 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2582881d19ce..c371bfee137a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
 	 * whereas POPF does not.)
 	 */
 	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
-	btr	$X86_EFLAGS_IF_BIT, (%esp)
+	btrl	$X86_EFLAGS_IF_BIT, (%esp)
 	popfl
 
 	/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 9de7f1e1dede..7d0df78db727 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   %r8			/* pt_regs->r8 */
+	pushq   $0			/* pt_regs->r8  = 0 */
 	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   %r9			/* pt_regs->r9 */
+	pushq   $0			/* pt_regs->r9  = 0 */
 	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   %r10			/* pt_regs->r10 */
+	pushq   $0			/* pt_regs->r10 = 0 */
 	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   %r11			/* pt_regs->r11 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
@@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	xorl	%ecx, %ecx		/* nospec   cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   %r8			/* pt_regs->r8 */
 	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   %r9			/* pt_regs->r9 */
 	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   %r10			/* pt_regs->r10*/
 	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   $0			/* pt_regs->r11 = 0 */
+	pushq   %r11			/* pt_regs->r11 */
 	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index ada6410fd2ec..fbd578daa66e 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
 
 static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 {
+	if (!pgtable_l5_enabled())
+		return;
+
 	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
 	free_page((unsigned long)p4d);
 }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 99ecde23c3ec..5715647fc4fe 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -898,7 +898,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 #define pgd_page(pgd)	pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
-static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
 	if (!pgtable_l5_enabled())
 		return (p4d_t *)pgd;
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 0fdcd21dadbd..3c5385f9a88f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
 }
 #endif
 
-static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
 	pgd_t pgd;
 
@@ -230,7 +230,7 @@ static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 	*p4dp = native_make_p4d(native_pgd_val(pgd));
 }
 
-static __always_inline void native_p4d_clear(p4d_t *p4d)
+static inline void native_p4d_clear(p4d_t *p4d)
 {
 	native_set_p4d(p4d, native_make_p4d(0));
 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9a84a0d08727..2aafa6ab6103 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -641,11 +641,6 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
-static const char nx_warning[] = KERN_CRIT
-"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
-static const char smep_warning[] = KERN_CRIT
-"unable to execute userspace code (SMEP?) (uid: %d)\n";
-
 static void
 show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		unsigned long address)
@@ -664,20 +659,18 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		pte = lookup_address_in_pgd(pgd, address, &level);
 
 		if (pte && pte_present(*pte) && !pte_exec(*pte))
-			printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
+			pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
+				from_kuid(&init_user_ns, current_uid()));
 		if (pte && pte_present(*pte) && pte_exec(*pte) &&
 				(pgd_flags(*pgd) & _PAGE_USER) &&
 				(__read_cr4() & X86_CR4_SMEP))
-			printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
+			pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
+				from_kuid(&init_user_ns, current_uid()));
 	}
 
-	printk(KERN_ALERT "BUG: unable to handle kernel ");
-	if (address < PAGE_SIZE)
-		printk(KERN_CONT "NULL pointer dereference");
-	else
-		printk(KERN_CONT "paging request");
-
-	printk(KERN_CONT " at %px\n", (void *) address);
+	pr_alert("BUG: unable to handle kernel %s at %px\n",
+		 address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
+		 (void *)address);
 
 	dump_pagetable(address);
 }
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index e01f7ceb9e7a..77873ce700ae 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
 
-		if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+		if (!pgd_present(*pgd))
 			continue;
 
 		for (i = 0; i < PTRS_PER_P4D; i++) {
 			p4d = p4d_offset(pgd,
 					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
 
-			if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+			if (!p4d_present(*p4d))
 				continue;
 
 			pud = (pud_t *)p4d_page_vaddr(*p4d);
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 246145b84a12..4d9dc3f2fd70 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -610,21 +610,41 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 	 */
 	for (int i = 0; i < NGREG; i++) {
 		greg_t req = requested_regs[i], res = resulting_regs[i];
+
 		if (i == REG_TRAPNO || i == REG_IP)
 			continue;	/* don't care */
-		if (i == REG_SP) {
-			printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
-			       (unsigned long long)res);
 
+		if (i == REG_SP) {
 			/*
-			 * In many circumstances, the high 32 bits of rsp
-			 * are zeroed.  For example, we could be a real
-			 * 32-bit program, or we could hit any of a number
-			 * of poorly-documented IRET or segmented ESP
-			 * oddities.  If this happens, it's okay.
+			 * If we were using a 16-bit stack segment, then
+			 * the kernel is a bit stuck: IRET only restores
+			 * the low 16 bits of ESP/RSP if SS is 16-bit.
+			 * The kernel uses a hack to restore bits 31:16,
+			 * but that hack doesn't help with bits 63:32.
+			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
+			 * AMD CPUs, they leak the high bits of the kernel
+			 * espfix64 stack pointer.  There's very little that
+			 * the kernel can do about it.
+			 *
+			 * Similarly, if we are returning to a 32-bit context,
+			 * the CPU will often lose the high 32 bits of RSP.
 			 */
-			if (res == (req & 0xFFFFFFFF))
-				continue;  /* OK; not expected to work */
+
+			if (res == req)
+				continue;
+
+			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+				printf("[NOTE]\tSP: %llx -> %llx\n",
+				       (unsigned long long)req,
+				       (unsigned long long)res);
+				continue;
+			}
+
+			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+			continue;
 		}
 
 		bool ignore_reg = false;
@@ -654,25 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 #endif
 
 		/* Sanity check on the kernel */
-		if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
+		if (i == REG_CX && req != res) {
 			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
-			       (unsigned long long)requested_regs[i],
-			       (unsigned long long)resulting_regs[i]);
+			       (unsigned long long)req,
+			       (unsigned long long)res);
 			nerrs++;
 			continue;
 		}
 
-		if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
-			/*
-			 * SP is particularly interesting here.  The
-			 * usual cause of failures is that we hit the
-			 * nasty IRET case of returning to a 16-bit SS,
-			 * in which case bits 16:31 of the *kernel*
-			 * stack pointer persist in ESP.
-			 */
+		if (req != res && !ignore_reg) {
 			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
-			       i, (unsigned long long)requested_regs[i],
-			       (unsigned long long)resulting_regs[i]);
+			       i, (unsigned long long)req,
+			       (unsigned long long)res);
 			nerrs++;
 		}
 	}

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-03-31 10:36 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2018-03-31 10:36 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: bd47a85acd727e27b7283daff557865ad04c59f6 x86/platform/UV: Fix critical UV MMR address error

Two UV platform fixes, and a kbuild fix.

 Thanks,

	Ingo

------------------>
Andrew Banman (1):
      x86/platform/uv/BAU: Add APIC idt entry

Sven Wegener (1):
      x86/purgatory: Avoid creating stray .<pid>.d files, remove -MD from KBUILD_CFLAGS

mike.travis@hpe.com (1):
      x86/platform/UV: Fix critical UV MMR address error


 arch/x86/include/asm/hw_irq.h     | 1 +
 arch/x86/include/asm/uv/uv_mmrs.h | 2 +-
 arch/x86/kernel/idt.c             | 3 +++
 arch/x86/platform/uv/tlb_uv.c     | 2 --
 arch/x86/purgatory/Makefile       | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 2851077b6051..32e666e1231e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
 extern asmlinkage void kvm_posted_intr_nested_ipi(void);
 extern asmlinkage void error_interrupt(void);
 extern asmlinkage void irq_work_interrupt(void);
+extern asmlinkage void uv_bau_message_intr1(void);
 
 extern asmlinkage void spurious_interrupt(void);
 extern asmlinkage void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index ecb9ddef128f..62c79e26a59a 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
 #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR (				\
 	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
 	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 56d99be3706a..5d039c848bd9 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -140,6 +140,9 @@ static const __initconst struct idt_data apic_idts[] = {
 # ifdef CONFIG_IRQ_WORK
 	INTG(IRQ_WORK_VECTOR,		irq_work_interrupt),
 # endif
+#ifdef CONFIG_X86_UV
+	INTG(UV_BAU_MESSAGE,		uv_bau_message_intr1),
+#endif
 	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
 	INTG(ERROR_APIC_VECTOR,		error_interrupt),
 #endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index db77e087adaf..b36caae0fb2f 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2255,8 +2255,6 @@ static int __init uv_bau_init(void)
 			init_uvhub(uvhub, vector, uv_base_pnode);
 	}
 
-	alloc_intr_gate(vector, uv_bau_message_intr1);
-
 	for_each_possible_blade(uvhub) {
 		if (uv_blade_nr_possible_cpus(uvhub)) {
 			unsigned long val;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2f15a2ac4209..d70c15de417b 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -16,7 +16,7 @@ KCOV_INSTRUMENT := n
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
 # sure how to relocate those. Like kexec-tools, use custom flags.
 
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large
 KBUILD_CFLAGS += -m$(BITS)
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-02-15  0:45 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2018-02-15  0:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: fd0e786d9d09024f67bd71ec094b110237dc3840 x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages

Misc fixes all across the map:

 - /proc/kcore vsyscall related fixes
 - LTO fix
 - build warning fix
 - CPU hotplug fix
 - Kconfig NR_CPUS cleanups
 - cpu_has() cleanups/robustification
 - .gitignore fix
 - memory-failure unmapping fix
 - UV platform fix

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  fs/proc/kcore.c                    # 595dd46ebfc1: vfs/proc/kcore, x86/mm/kcore
  include/linux/kcore.h              # 595dd46ebfc1: vfs/proc/kcore, x86/mm/kcore
  include/linux/mm_inline.h          # fd0e786d9d09: x86/mm, mm/hwpoison: Don't u
  mm/memory-failure.c                # fd0e786d9d09: x86/mm, mm/hwpoison: Don't u

 Thanks,

	Ingo

------------------>
Arnd Bergmann (1):
      x86/error_inject: Make just_return_func() globally visible

Borislav Petkov (1):
      x86/MCE: Fix build warning introduced by "x86: do not use print_symbol()"

Ingo Molnar (1):
      x86/Kconfig: Further simplify the NR_CPUS config

Jia Zhang (2):
      vfs/proc/kcore, x86/mm/kcore: Fix SMAP fault when dumping vsyscall user page
      x86/mm/kcore: Add vsyscall page to /proc/kcore conditionally

Masayoshi Mizuma (1):
      x86/smpboot: Fix uncore_pci_remove() indexing bug when hot-removing a physical CPU

Peter Zijlstra (2):
      x86/cpufeature: Reindent _static_cpu_has()
      x86/cpufeature: Update _static_cpu_has() to use all named variables

Progyan Bhattacharya (1):
      x86/build: Add arch/x86/tools/insn_decoder_test to .gitignore

Randy Dunlap (1):
      x86/Kconfig: Simplify NR_CPUS config

Tony Luck (1):
      x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages

mike.travis@hpe.com (1):
      x86/platform/UV: Fix GAM Range Table entries less than 1GB


 arch/x86/.gitignore                       |  1 +
 arch/x86/Kconfig                          | 75 ++++++++++++++++++++++-------
 arch/x86/include/asm/cpufeature.h         | 79 ++++++++++++++++---------------
 arch/x86/include/asm/page_64.h            |  4 --
 arch/x86/kernel/apic/x2apic_uv_x.c        | 15 ++++--
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 ++++++
 arch/x86/kernel/cpu/mcheck/mce.c          | 19 +++++---
 arch/x86/kernel/smpboot.c                 |  1 -
 arch/x86/lib/error-inject.c               |  1 +
 arch/x86/mm/init_64.c                     |  4 +-
 fs/proc/kcore.c                           |  4 ++
 include/linux/kcore.h                     |  1 +
 include/linux/mm_inline.h                 |  6 ---
 mm/memory-failure.c                       |  2 -
 14 files changed, 146 insertions(+), 81 deletions(-)

diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
 boot/compressed/vmlinux
 tools/test_get_len
 tools/insn_sanity
+tools/insn_decoder_test
 purgatory/kexec-purgatory.c
 purgatory/purgatory.ro
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 63bf349b2b24..a528c14d45a5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
 	  For old smp systems that do not have proper acpi support. Newer systems
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config X86_BIGSMP
-	bool "Support for big SMP systems with more than 8 CPUs"
-	depends on X86_32 && SMP
-	---help---
-	  This option is needed for the systems that have more than 8 CPUs
-
 config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
 	  Say N if unsure.
 
 if X86_32
+config X86_BIGSMP
+	bool "Support for big SMP systems with more than 8 CPUs"
+	depends on SMP
+	---help---
+	  This option is needed for the systems that have more than 8 CPUs
+
 config X86_EXTENDED_PLATFORM
 	bool "Support for extended (non-PC) x86 platforms"
 	default y
@@ -949,25 +949,66 @@ config MAXSMP
 	  Enable maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+	int
+	default NR_CPUS_RANGE_END if MAXSMP
+	default    1 if !SMP
+	default    2
+
+config NR_CPUS_RANGE_END
+	int
+	depends on X86_32
+	default   64 if  SMP &&  X86_BIGSMP
+	default    8 if  SMP && !X86_BIGSMP
+	default    1 if !SMP
+
+config NR_CPUS_RANGE_END
+	int
+	depends on X86_64
+	default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+	default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+	default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+	int
+	depends on X86_32
+	default   32 if  X86_BIGSMP
+	default    8 if  SMP
+	default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+	int
+	depends on X86_64
+	default 8192 if  MAXSMP
+	default   64 if  SMP
+	default    1 if !SMP
+
 config NR_CPUS
 	int "Maximum number of CPUs" if SMP && !MAXSMP
-	range 2 8 if SMP && X86_32 && !X86_BIGSMP
-	range 2 64 if SMP && X86_32 && X86_BIGSMP
-	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-	default "1" if !SMP
-	default "8192" if MAXSMP
-	default "32" if SMP && X86_BIGSMP
-	default "8" if SMP && X86_32
-	default "64" if SMP
+	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+	default NR_CPUS_DEFAULT
 	---help---
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
 	  supported value is 8192, otherwise the maximum value is 512.  The
 	  minimum value which makes sense is 2.
 
-	  This is purely to save memory - each supported CPU adds
-	  approximately eight kilobytes to the kernel image.
+	  This is purely to save memory: each supported CPU adds about 8KB
+	  to the kernel image.
 
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-		asm_volatile_goto("1: jmp 6f\n"
-			 "2:\n"
-			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-			         "((5f-4f) - (2b-1b)),0x90\n"
-			 "3:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 4f - .\n"		/* repl offset */
-			 " .word %P1\n"			/* always replace */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 5f - 4f\n"		/* repl len */
-			 " .byte 3b - 2b\n"		/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_replacement,\"ax\"\n"
-			 "4: jmp %l[t_no]\n"
-			 "5:\n"
-			 ".previous\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 0\n"			/* no replacement */
-			 " .word %P0\n"			/* feature bit */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 0\n"			/* repl len */
-			 " .byte 0\n"			/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_aux,\"ax\"\n"
-			 "6:\n"
-			 " testb %[bitnum],%[cap_byte]\n"
-			 " jnz %l[t_yes]\n"
-			 " jmp %l[t_no]\n"
-			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-			     [bitnum] "i" (1 << (bit & 7)),
-			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-			 : : t_yes, t_no);
-	t_yes:
-		return true;
-	t_no:
-		return false;
+	asm_volatile_goto("1: jmp 6f\n"
+		 "2:\n"
+		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			 "((5f-4f) - (2b-1b)),0x90\n"
+		 "3:\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 4f - .\n"		/* repl offset */
+		 " .word %P[always]\n"		/* always replace */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 5f - 4f\n"		/* repl len */
+		 " .byte 3b - 2b\n"		/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_replacement,\"ax\"\n"
+		 "4: jmp %l[t_no]\n"
+		 "5:\n"
+		 ".previous\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 0\n"			/* no replacement */
+		 " .word %P[feature]\n"		/* feature bit */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 0\n"			/* repl len */
+		 " .byte 0\n"			/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_aux,\"ax\"\n"
+		 "6:\n"
+		 " testb %[bitnum],%[cap_byte]\n"
+		 " jnz %l[t_yes]\n"
+		 " jmp %l[t_no]\n"
+		 ".previous\n"
+		 : : [feature]  "i" (bit),
+		     [always]   "i" (X86_FEATURE_ALWAYS),
+		     [bitnum]   "i" (1 << (bit & 7)),
+		     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+		 : : t_yes, t_no);
+t_yes:
+	return true;
+t_no:
+	return false;
 }
 
 #define static_cpu_has(bit)					\
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..d652a3808065 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 
 	uv_gre_table = gre;
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+		unsigned long size = ((unsigned long)(gre->limit - lgre)
+					<< UV_GAM_RANGE_SHFT);
+		int order = 0;
+		char suffix[] = " KMGTPE";
+
+		while (size > 9999 && order < sizeof(suffix)) {
+			size /= 1024;
+			order++;
+		}
+
 		if (!index) {
 			pr_info("UV: GAM Range Table...\n");
 			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
 		}
-		pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
+		pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
 			index++,
 			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
 			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-			((unsigned long)(gre->limit - lgre)) >>
-				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+			size, suffix[order],
 			gre->type, gre->nasid, gre->sockid, gre->pnode);
 
 		lgre = gre->limit;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 
 extern struct mca_config mca_cfg;
 
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ *    are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ *    recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..8ff94d1e2dce 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
 			m->cs, m->ip);
 
 		if (m->cs == __KERNEL_CS)
-			pr_cont("{%pS}", (void *)m->ip);
+			pr_cont("{%pS}", (void *)(unsigned long)m->ip);
 		pr_cont("\n");
 	}
 
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 
 	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
 		pfn = mce->addr >> PAGE_SHIFT;
-		memory_failure(pfn, 0);
+		if (!memory_failure(pfn, 0))
+			mce_unmap_kpfn(pfn);
 	}
 
 	return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
 	ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
 	if (ret)
 		pr_err("Memory error not recovered");
+	else
+		mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
 	return ret;
 }
 
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
 {
 	unsigned long decoy_addr;
 
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
 	 * We would like to just call:
 	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
 	 * but doing that would radically increase the odds of a
-	 * speculative access to the posion page because we'd have
+	 * speculative access to the poison page because we'd have
 	 * the virtual address of the kernel 1:1 mapping sitting
 	 * around in registers.
 	 * Instead we get tricky.  We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
 
 	if (set_memory_np(decoy_addr, 1))
 		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
 }
 #endif
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..cfc61e1d45e2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1430,7 +1430,6 @@ static void remove_siblinginfo(int cpu)
 	cpumask_clear(cpu_llc_shared_mask(cpu));
 	cpumask_clear(topology_sibling_cpumask(cpu));
 	cpumask_clear(topology_core_cpumask(cpu));
-	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
 	recompute_smt_state();
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
 
 asm(
 	".type just_return_func, @function\n"
+	".globl just_return_func\n"
 	"just_return_func:\n"
 	"	ret\n"
 	".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ab42c852069..8ba9c3128947 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1193,8 +1193,8 @@ void __init mem_init(void)
 	register_page_bootmem_info();
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
-			 PAGE_SIZE, KCORE_OTHER);
+	if (get_gate_vma(&init_mm))
+		kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
 
 	mem_init_print_info(NULL);
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e8a93bc8285d..d1e82761de81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -510,6 +510,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 			/* we have to zero-fill user buffer even if no read */
 			if (copy_to_user(buffer, buf, tsz))
 				return -EFAULT;
+		} else if (m->type == KCORE_USER) {
+			/* User page is handled prior to normal kernel page: */
+			if (copy_to_user(buffer, (char *)start, tsz))
+				return -EFAULT;
 		} else {
 			if (kern_addr_valid(start)) {
 				/*
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 7ff25a808fef..80db19d3a505 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -10,6 +10,7 @@ enum kcore_type {
 	KCORE_VMALLOC,
 	KCORE_RAM,
 	KCORE_VMEMMAP,
+	KCORE_USER,
 	KCORE_OTHER,
 };
 
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c30b32e3c862..10191c28fc04 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
 
 #define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
 
-#ifdef arch_unmap_kpfn
-extern void arch_unmap_kpfn(unsigned long pfn);
-#else
-static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
-#endif
-
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4b80ccee4535..8291b75f42c8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
 		return 0;
 	}
 
-	arch_unmap_kpfn(pfn);
-
 	orig_head = hpage = compound_head(p);
 	num_poisoned_pages_inc();
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-01-17 15:41 Ingo Molnar
  2018-01-17 20:35 ` Linus Torvalds
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2018-01-17 15:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 45d55e7bac4028af93f5fa324e69958a0b868e96 x86/apic/vector: Fix off by one in error path

Misc fixes:

 - A rather involved set of memory hardware encryption fixes to support the early 
   loading of microcode files via the initrd. These are larger than what we 
   normally take at such a late -rc stage, but there are two mitigating factors: 
   1) much of the changes are limited to the SME code itself 2) being able to 
   early load microcode has increased importance in the post-Meltdown/Spectre era.

 - An IRQ vector allocator fix

 - An Intel RDT driver use-after-free fix

 - An APIC driver bug fix/revert to make certain older systems boot again

 - A pkeys ABI fix

 - TSC calibration fixes

 - A kdump fix

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/crash_core.h         # 9f15b9120f56: kdump: Write the correct add
  kernel/crash_core.c                # 9f15b9120f56: kdump: Write the correct add

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      x86/idt: Mark IDT tables __initconst

Eric W. Biederman (1):
      x86/mm/pkeys: Fix fill_sig_info_pkey

Kirill A. Shutemov (1):
      kdump: Write the correct address of mem_section into vmcoreinfo

Len Brown (3):
      x86/tsc: Future-proof native_calibrate_tsc()
      x86/tsc: Fix erroneous TSC rate on Skylake Xeon
      x86/tsc: Print tsc_khz, when it differs from cpu_khz

Thomas Gleixner (2):
      x86/intel_rdt/cqm: Prevent use after free
      x86/apic/vector: Fix off by one in error path

Tom Lendacky (5):
      x86/mm: Clean up register saving in the __enc_copy() assembly code
      x86/mm: Use a struct to reduce parameters for SME PGD mapping
      x86/mm: Centralize PMD flags in sme_encrypt_kernel()
      x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
      x86/mm: Encrypt the initrd earlier for BSP microcode update

Ville Syrjälä (1):
      Revert "x86/apic: Remove init_bsp_APIC()"


 arch/x86/include/asm/apic.h        |   1 +
 arch/x86/include/asm/mem_encrypt.h |   4 +-
 arch/x86/kernel/apic/apic.c        |  49 +++++
 arch/x86/kernel/apic/vector.c      |   7 +-
 arch/x86/kernel/cpu/intel_rdt.c    |   8 +-
 arch/x86/kernel/head64.c           |   4 +-
 arch/x86/kernel/idt.c              |  12 +-
 arch/x86/kernel/irqinit.c          |   3 +
 arch/x86/kernel/setup.c            |  10 --
 arch/x86/kernel/tsc.c              |   9 +-
 arch/x86/mm/fault.c                |   7 +-
 arch/x86/mm/mem_encrypt.c          | 356 +++++++++++++++++++++++++++----------
 arch/x86/mm/mem_encrypt_boot.S     |  80 +++++----
 include/linux/crash_core.h         |   2 +
 kernel/crash_core.c                |   2 +-
 15 files changed, 391 insertions(+), 163 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a9e57f08bfa6..98722773391d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
 extern void apic_intr_mode_init(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4c3c68..22c5f3e6f820 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
 
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
 void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
 
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
 static inline void __init sme_enable(struct boot_params *bp) { }
 
 static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 880441f24146..25ddf02598d2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
 	return APIC_SYMMETRIC_IO;
 }
 
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+	unsigned int value;
+
+	/*
+	 * Don't do the setup now if we have a SMP BIOS as the
+	 * through-I/O-APIC virtual wire mode might be active.
+	 */
+	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
+		return;
+
+	/*
+	 * Do not trust the local APIC being empty at bootup.
+	 */
+	clear_local_APIC();
+
+	/*
+	 * Enable APIC.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/* This bit is reserved on P4/Xeon and should be cleared */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    (boot_cpu_data.x86 == 15))
+		value &= ~APIC_SPIV_FOCUS_DISABLED;
+	else
+#endif
+		value |= APIC_SPIV_FOCUS_DISABLED;
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up the virtual wire mode.
+	 */
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	value = APIC_DM_NMI;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	if (apic_extnmi == APIC_EXTNMI_NONE)
+		value |= APIC_LVT_MASKED;
+	apic_write(APIC_LVT1, value);
+}
+
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8b03bb8e725..3cc471beb50b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 
 		err = assign_irq_vector_policy(irqd, info);
 		trace_vector_setup(virq + i, false, err);
-		if (err)
+		if (err) {
+			irqd->chip_data = NULL;
+			free_apic_chip_data(apicd);
 			goto error;
+		}
 	}
 
 	return 0;
 
 error:
-	x86_vector_free_irqs(domain, virq, i + 1);
+	x86_vector_free_irqs(domain, virq, i);
 	return err;
 }
 
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		 */
 		if (static_branch_unlikely(&rdt_mon_enable_key))
 			rmdir_mondata_subdir_allrdtgrp(r, d->id);
-		kfree(d->ctrl_val);
-		kfree(d->rmid_busy_llc);
-		kfree(d->mbm_total);
-		kfree(d->mbm_local);
 		list_del(&d->list);
 		if (is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 			cancel_delayed_work(&d->cqm_limbo);
 		}
 
+		kfree(d->ctrl_val);
+		kfree(d->rmid_busy_llc);
+		kfree(d->mbm_total);
+		kfree(d->mbm_local);
 		kfree(d);
 		return;
 	}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757b9cfd..7ba5d819ebe3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	p = fixup_pointer(&phys_base, physaddr);
 	*p += load_delta - sme_get_me_mask();
 
-	/* Encrypt the kernel (if SME is active) */
-	sme_encrypt_kernel();
+	/* Encrypt the kernel and related (if SME is active) */
+	sme_encrypt_kernel(bp);
 
 	/*
 	 * Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
 	INTG(X86_TRAP_DB,		debug),
 	SYSG(X86_TRAP_BP,		int3),
 #ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
  * the traps which use them are reinitialized with IST after cpu_init() has
  * set up TSS.
  */
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
 	INTG(X86_TRAP_DE,		divide_error),
 	INTG(X86_TRAP_NMI,		nmi),
 	INTG(X86_TRAP_BR,		bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
 /*
  * The APIC and SMP idt entries
  */
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
 #ifdef CONFIG_SMP
 	INTG(RESCHEDULE_VECTOR,		reschedule_interrupt),
 	INTG(CALL_FUNCTION_VECTOR,	call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
 	INTG(X86_TRAP_PF,		page_fault),
 };
 
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
  * Override for the debug_idt. Same as the default, but with interrupt
  * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
  */
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
 	INTG(X86_TRAP_DB,	debug),
 	INTG(X86_TRAP_BP,	int3),
 };
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
  * The exceptions which use Interrupt stacks. They are setup after
  * cpu_init() when the TSS has been initialized.
  */
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
 	ISTG(X86_TRAP_DB,	debug,		DEBUG_STACK),
 	ISTG(X86_TRAP_NMI,	nmi,		NMI_STACK),
 	SISTG(X86_TRAP_BP,	int3,		DEBUG_STACK),
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 8da3e909e967..a539410c4ea9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
 	struct irq_chip *chip = legacy_pic->chip;
 	int i;
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	init_bsp_APIC();
+#endif
 	legacy_pic->init(0);
 
 	for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 145810b0edf6..68d7ab81c62f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
 	    !ramdisk_image || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
 
-	/*
-	 * If SME is active, this memory will be marked encrypted by the
-	 * kernel when it is accessed (including relocation). However, the
-	 * ramdisk image was loaded decrypted by the bootloader, so make
-	 * sure that it is encrypted before accessing it. For SEV the
-	 * ramdisk will already be encrypted, so only do this for SME.
-	 */
-	if (sme_active())
-		sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
 	initrd_start = 0;
 
 	mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8ea117f8142e..e169e85db434 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
 		case INTEL_FAM6_KABYLAKE_DESKTOP:
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
-		case INTEL_FAM6_SKYLAKE_X:
 		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
 		}
 	}
 
+	if (crystal_khz == 0)
+		return 0;
 	/*
 	 * TSC frequency determined by CPUID is a "hardware reported"
 	 * frequency and is the most accurate one so far we have. This
@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
 		(unsigned long)cpu_khz / 1000,
 		(unsigned long)cpu_khz % 1000);
 
+	if (cpu_khz != tsc_khz) {
+		pr_info("Detected %lu.%03lu MHz TSC",
+			(unsigned long)tsc_khz / 1000,
+			(unsigned long)tsc_khz % 1000);
+	}
+
 	/* Sanitize TSC ADJUST before cyc2ns gets initialized */
 	tsc_store_and_check_tsc_adjust(true);
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 06fe3d51d385..b3e40773dce0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  *	     faulted on a pte with its pkey=4.
  */
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+		u32 *pkey)
 {
 	/* This is effectively an #ifdef */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return;
 
 	/* Fault not from Protection Keys: nothing to do */
-	if (si_code != SEGV_PKUERR)
+	if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
 		return;
 	/*
 	 * force_sig_info_fault() is called from a number of
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		lsb = PAGE_SHIFT;
 	info.si_addr_lsb = lsb;
 
-	fill_sig_info_pkey(si_code, &info, pkey);
+	fill_sig_info_pkey(si_signo, si_code, &info, pkey);
 
 	force_sig_info(si_signo, &info, tsk);
 }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b13402e40..3ef362f598e3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
 	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
 }
 
-static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
-				 unsigned long end)
+struct sme_populate_pgd_data {
+	void	*pgtable_area;
+	pgd_t	*pgd;
+
+	pmdval_t pmd_flags;
+	pteval_t pte_flags;
+	unsigned long paddr;
+
+	unsigned long vaddr;
+	unsigned long vaddr_end;
+};
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
 {
 	unsigned long pgd_start, pgd_end, pgd_size;
 	pgd_t *pgd_p;
 
-	pgd_start = start & PGDIR_MASK;
-	pgd_end = end & PGDIR_MASK;
+	pgd_start = ppd->vaddr & PGDIR_MASK;
+	pgd_end = ppd->vaddr_end & PGDIR_MASK;
 
-	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
-	pgd_size *= sizeof(pgd_t);
+	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
 
-	pgd_p = pgd_base + pgd_index(start);
+	pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
 
 	memset(pgd_p, 0, pgd_size);
 }
 
-#define PGD_FLAGS	_KERNPG_TABLE_NOENC
-#define P4D_FLAGS	_KERNPG_TABLE_NOENC
-#define PUD_FLAGS	_KERNPG_TABLE_NOENC
-#define PMD_FLAGS	(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+#define PGD_FLAGS		_KERNPG_TABLE_NOENC
+#define P4D_FLAGS		_KERNPG_TABLE_NOENC
+#define PUD_FLAGS		_KERNPG_TABLE_NOENC
+#define PMD_FLAGS		_KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE		(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC		PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP	((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+				 (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC		(PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS		(__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC		PTE_FLAGS
+#define PTE_FLAGS_DEC_WP	((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+				 (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC		(PTE_FLAGS | _PAGE_ENC)
 
-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
-				     unsigned long vaddr, pmdval_t pmd_val)
+static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
 {
 	pgd_t *pgd_p;
 	p4d_t *p4d_p;
 	pud_t *pud_p;
 	pmd_t *pmd_p;
 
-	pgd_p = pgd_base + pgd_index(vaddr);
+	pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
 	if (native_pgd_val(*pgd_p)) {
 		if (IS_ENABLED(CONFIG_X86_5LEVEL))
 			p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
 		pgd_t pgd;
 
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			p4d_p = pgtable_area;
+			p4d_p = ppd->pgtable_area;
 			memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
-			pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+			ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
 
 			pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
 		} else {
-			pud_p = pgtable_area;
+			pud_p = ppd->pgtable_area;
 			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+			ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
 
 			pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
 		}
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
 	}
 
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-		p4d_p += p4d_index(vaddr);
+		p4d_p += p4d_index(ppd->vaddr);
 		if (native_p4d_val(*p4d_p)) {
 			pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
 		} else {
 			p4d_t p4d;
 
-			pud_p = pgtable_area;
+			pud_p = ppd->pgtable_area;
 			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+			ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
 
 			p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
 			native_set_p4d(p4d_p, p4d);
 		}
 	}
 
-	pud_p += pud_index(vaddr);
+	pud_p += pud_index(ppd->vaddr);
 	if (native_pud_val(*pud_p)) {
 		if (native_pud_val(*pud_p) & _PAGE_PSE)
-			goto out;
+			return NULL;
 
 		pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
 	} else {
 		pud_t pud;
 
-		pmd_p = pgtable_area;
+		pmd_p = ppd->pgtable_area;
 		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
-		pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+		ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
 
 		pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
 		native_set_pud(pud_p, pud);
 	}
 
-	pmd_p += pmd_index(vaddr);
+	return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+	pmd_t *pmd_p;
+
+	pmd_p = sme_prepare_pgd(ppd);
+	if (!pmd_p)
+		return;
+
+	pmd_p += pmd_index(ppd->vaddr);
 	if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
-		native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+		native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
+}
 
-out:
-	return pgtable_area;
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+	pmd_t *pmd_p;
+	pte_t *pte_p;
+
+	pmd_p = sme_prepare_pgd(ppd);
+	if (!pmd_p)
+		return;
+
+	pmd_p += pmd_index(ppd->vaddr);
+	if (native_pmd_val(*pmd_p)) {
+		if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+			return;
+
+		pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+	} else {
+		pmd_t pmd;
+
+		pte_p = ppd->pgtable_area;
+		memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+		ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+		pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+		native_set_pmd(pmd_p, pmd);
+	}
+
+	pte_p += pte_index(ppd->vaddr);
+	if (!native_pte_val(*pte_p))
+		native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+	while (ppd->vaddr < ppd->vaddr_end) {
+		sme_populate_pgd_large(ppd);
+
+		ppd->vaddr += PMD_PAGE_SIZE;
+		ppd->paddr += PMD_PAGE_SIZE;
+	}
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+	while (ppd->vaddr < ppd->vaddr_end) {
+		sme_populate_pgd(ppd);
+
+		ppd->vaddr += PAGE_SIZE;
+		ppd->paddr += PAGE_SIZE;
+	}
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+				   pmdval_t pmd_flags, pteval_t pte_flags)
+{
+	unsigned long vaddr_end;
+
+	ppd->pmd_flags = pmd_flags;
+	ppd->pte_flags = pte_flags;
+
+	/* Save original end value since we modify the struct value */
+	vaddr_end = ppd->vaddr_end;
+
+	/* If start is not 2MB aligned, create PTE entries */
+	ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+	__sme_map_range_pte(ppd);
+
+	/* Create PMD entries */
+	ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+	__sme_map_range_pmd(ppd);
+
+	/* If end is not 2MB aligned, create PTE entries */
+	ppd->vaddr_end = vaddr_end;
+	__sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
 }
 
 static unsigned long __init sme_pgtable_calc(unsigned long len)
 {
-	unsigned long p4d_size, pud_size, pmd_size;
+	unsigned long p4d_size, pud_size, pmd_size, pte_size;
 	unsigned long total;
 
 	/*
 	 * Perform a relatively simplistic calculation of the pagetable
-	 * entries that are needed. That mappings will be covered by 2MB
-	 * PMD entries so we can conservatively calculate the required
+	 * entries that are needed. Those mappings will be covered mostly
+	 * by 2MB PMD entries so we can conservatively calculate the required
 	 * number of P4D, PUD and PMD structures needed to perform the
-	 * mappings. Incrementing the count for each covers the case where
-	 * the addresses cross entries.
+	 * mappings.  For mappings that are not 2MB aligned, PTE mappings
+	 * would be needed for the start and end portion of the address range
+	 * that fall outside of the 2MB alignment.  This results in, at most,
+	 * two extra pages to hold PTE entries for each range that is mapped.
+	 * Incrementing the count for each covers the case where the addresses
+	 * cross entries.
 	 */
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
 		p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
 	}
 	pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
 	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+	pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
 
-	total = p4d_size + pud_size + pmd_size;
+	total = p4d_size + pud_size + pmd_size + pte_size;
 
 	/*
 	 * Now calculate the added pagetable structures needed to populate
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
 	return total;
 }
 
-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
 {
 	unsigned long workarea_start, workarea_end, workarea_len;
 	unsigned long execute_start, execute_end, execute_len;
 	unsigned long kernel_start, kernel_end, kernel_len;
+	unsigned long initrd_start, initrd_end, initrd_len;
+	struct sme_populate_pgd_data ppd;
 	unsigned long pgtable_area_len;
-	unsigned long paddr, pmd_flags;
 	unsigned long decrypted_base;
-	void *pgtable_area;
-	pgd_t *pgd;
 
 	if (!sme_active())
 		return;
 
 	/*
-	 * Prepare for encrypting the kernel by building new pagetables with
-	 * the necessary attributes needed to encrypt the kernel in place.
+	 * Prepare for encrypting the kernel and initrd by building new
+	 * pagetables with the necessary attributes needed to encrypt the
+	 * kernel in place.
 	 *
 	 *   One range of virtual addresses will map the memory occupied
-	 *   by the kernel as encrypted.
+	 *   by the kernel and initrd as encrypted.
 	 *
 	 *   Another range of virtual addresses will map the memory occupied
-	 *   by the kernel as decrypted and write-protected.
+	 *   by the kernel and initrd as decrypted and write-protected.
 	 *
 	 *     The use of write-protect attribute will prevent any of the
 	 *     memory from being cached.
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
 	kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
 	kernel_len = kernel_end - kernel_start;
 
+	initrd_start = 0;
+	initrd_end = 0;
+	initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+		     ((unsigned long)bp->ext_ramdisk_size << 32);
+	if (initrd_len) {
+		initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+			       ((unsigned long)bp->ext_ramdisk_image << 32);
+		initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+		initrd_len = initrd_end - initrd_start;
+	}
+#endif
+
 	/* Set the encryption workarea to be immediately after the kernel */
 	workarea_start = kernel_end;
 
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
 	 */
 	pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
 	pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+	if (initrd_len)
+		pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
 
 	/* PUDs and PMDs needed in the current pagetables for the workarea */
 	pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
 
 	/*
 	 * The total workarea includes the executable encryption area and
-	 * the pagetable area.
+	 * the pagetable area. The start of the workarea is already 2MB
+	 * aligned, align the end of the workarea on a 2MB boundary so that
+	 * we don't try to create/allocate PTE entries from the workarea
+	 * before it is mapped.
 	 */
 	workarea_len = execute_len + pgtable_area_len;
-	workarea_end = workarea_start + workarea_len;
+	workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
 
 	/*
 	 * Set the address to the start of where newly created pagetable
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
 	 * pagetables and when the new encrypted and decrypted kernel
 	 * mappings are populated.
 	 */
-	pgtable_area = (void *)execute_end;
+	ppd.pgtable_area = (void *)execute_end;
 
 	/*
 	 * Make sure the current pagetable structure has entries for
 	 * addressing the workarea.
 	 */
-	pgd = (pgd_t *)native_read_cr3_pa();
-	paddr = workarea_start;
-	while (paddr < workarea_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + PMD_FLAGS);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.pgd = (pgd_t *)native_read_cr3_pa();
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start;
+	ppd.vaddr_end = workarea_end;
+	sme_map_range_decrypted(&ppd);
 
 	/* Flush the TLB - no globals so cr3 is enough */
 	native_write_cr3(__native_read_cr3());
 
 	/*
 	 * A new pagetable structure is being built to allow for the kernel
-	 * to be encrypted. It starts with an empty PGD that will then be
-	 * populated with new PUDs and PMDs as the encrypted and decrypted
-	 * kernel mappings are created.
+	 * and initrd to be encrypted. It starts with an empty PGD that will
+	 * then be populated with new PUDs and PMDs as the encrypted and
+	 * decrypted kernel mappings are created.
 	 */
-	pgd = pgtable_area;
-	memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
-	pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
-
-	/* Add encrypted kernel (identity) mappings */
-	pmd_flags = PMD_FLAGS | _PAGE_ENC;
-	paddr = kernel_start;
-	while (paddr < kernel_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + pmd_flags);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.pgd = ppd.pgtable_area;
+	memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+	ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
 
 	/*
 	 * A different PGD index/entry must be used to get different
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
 	 * the base of the mapping.
 	 */
 	decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+	if (initrd_len) {
+		unsigned long check_base;
+
+		check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+		decrypted_base = max(decrypted_base, check_base);
+	}
 	decrypted_base <<= PGDIR_SHIFT;
 
+	/* Add encrypted kernel (identity) mappings */
+	ppd.paddr = kernel_start;
+	ppd.vaddr = kernel_start;
+	ppd.vaddr_end = kernel_end;
+	sme_map_range_encrypted(&ppd);
+
 	/* Add decrypted, write-protected kernel (non-identity) mappings */
-	pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
-	paddr = kernel_start;
-	while (paddr < kernel_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr + decrypted_base,
-						paddr + pmd_flags);
-
-		paddr += PMD_PAGE_SIZE;
+	ppd.paddr = kernel_start;
+	ppd.vaddr = kernel_start + decrypted_base;
+	ppd.vaddr_end = kernel_end + decrypted_base;
+	sme_map_range_decrypted_wp(&ppd);
+
+	if (initrd_len) {
+		/* Add encrypted initrd (identity) mappings */
+		ppd.paddr = initrd_start;
+		ppd.vaddr = initrd_start;
+		ppd.vaddr_end = initrd_end;
+		sme_map_range_encrypted(&ppd);
+		/*
+		 * Add decrypted, write-protected initrd (non-identity) mappings
+		 */
+		ppd.paddr = initrd_start;
+		ppd.vaddr = initrd_start + decrypted_base;
+		ppd.vaddr_end = initrd_end + decrypted_base;
+		sme_map_range_decrypted_wp(&ppd);
 	}
 
 	/* Add decrypted workarea mappings to both kernel mappings */
-	paddr = workarea_start;
-	while (paddr < workarea_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + PMD_FLAGS);
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start;
+	ppd.vaddr_end = workarea_end;
+	sme_map_range_decrypted(&ppd);
 
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr + decrypted_base,
-						paddr + PMD_FLAGS);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start + decrypted_base;
+	ppd.vaddr_end = workarea_end + decrypted_base;
+	sme_map_range_decrypted(&ppd);
 
 	/* Perform the encryption */
 	sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
-			    kernel_len, workarea_start, (unsigned long)pgd);
+			    kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+	if (initrd_len)
+		sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+				    initrd_len, workarea_start,
+				    (unsigned long)ppd.pgd);
 
 	/*
 	 * At this point we are running encrypted.  Remove the mappings for
 	 * the decrypted areas - all that is needed for this is to remove
 	 * the PGD entry/entries.
 	 */
-	sme_clear_pgd(pgd, kernel_start + decrypted_base,
-		      kernel_end + decrypted_base);
+	ppd.vaddr = kernel_start + decrypted_base;
+	ppd.vaddr_end = kernel_end + decrypted_base;
+	sme_clear_pgd(&ppd);
+
+	if (initrd_len) {
+		ppd.vaddr = initrd_start + decrypted_base;
+		ppd.vaddr_end = initrd_end + decrypted_base;
+		sme_clear_pgd(&ppd);
+	}
 
-	sme_clear_pgd(pgd, workarea_start + decrypted_base,
-		      workarea_end + decrypted_base);
+	ppd.vaddr = workarea_start + decrypted_base;
+	ppd.vaddr_end = workarea_end + decrypted_base;
+	sme_clear_pgd(&ppd);
 
 	/* Flush the TLB - no globals so cr3 is enough */
 	native_write_cr3(__native_read_cr3());
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d541df1..01f682cf77a8 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
 
 	/*
 	 * Entry parameters:
-	 *   RDI - virtual address for the encrypted kernel mapping
-	 *   RSI - virtual address for the decrypted kernel mapping
-	 *   RDX - length of kernel
+	 *   RDI - virtual address for the encrypted mapping
+	 *   RSI - virtual address for the decrypted mapping
+	 *   RDX - length to encrypt
 	 *   RCX - virtual address of the encryption workarea, including:
 	 *     - stack page (PAGE_SIZE)
 	 *     - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
 	addq	$PAGE_SIZE, %rax	/* Workarea encryption routine */
 
 	push	%r12
-	movq	%rdi, %r10		/* Encrypted kernel */
-	movq	%rsi, %r11		/* Decrypted kernel */
-	movq	%rdx, %r12		/* Kernel length */
+	movq	%rdi, %r10		/* Encrypted area */
+	movq	%rsi, %r11		/* Decrypted area */
+	movq	%rdx, %r12		/* Area length */
 
 	/* Copy encryption routine into the workarea */
 	movq	%rax, %rdi				/* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
 	rep	movsb
 
 	/* Setup registers for call */
-	movq	%r10, %rdi		/* Encrypted kernel */
-	movq	%r11, %rsi		/* Decrypted kernel */
+	movq	%r10, %rdi		/* Encrypted area */
+	movq	%r11, %rsi		/* Decrypted area */
 	movq	%r8, %rdx		/* Pagetables used for encryption */
-	movq	%r12, %rcx		/* Kernel length */
+	movq	%r12, %rcx		/* Area length */
 	movq	%rax, %r8		/* Workarea encryption routine */
 	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */
 
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
 
 ENTRY(__enc_copy)
 /*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
  *   This routine must be run outside of the kernel proper since
  *   the kernel will be encrypted during the process. So this
  *   routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
  *   during execution.
  *
  *   On entry the registers must be:
- *     RDI - virtual address for the encrypted kernel mapping
- *     RSI - virtual address for the decrypted kernel mapping
+ *     RDI - virtual address for the encrypted mapping
+ *     RSI - virtual address for the decrypted mapping
  *     RDX - address of the pagetables to use for encryption
- *     RCX - length of kernel
+ *     RCX - length of area
  *      R8 - intermediate copy buffer
  *
  *     RAX - points to this routine
  *
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
  */
 	/* Enable the new page tables */
 	mov	%rdx, %cr3
@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
 	orq	$X86_CR4_PGE, %rdx
 	mov	%rdx, %cr4
 
+	push	%r15
+	push	%r12
+
+	movq	%rcx, %r9		/* Save area length */
+	movq	%rdi, %r10		/* Save encrypted area address */
+	movq	%rsi, %r11		/* Save decrypted area address */
+
 	/* Set the PAT register PA5 entry to write-protect */
-	push	%rcx
 	movl	$MSR_IA32_CR_PAT, %ecx
 	rdmsr
-	push	%rdx			/* Save original PAT value */
+	mov	%rdx, %r15		/* Save original PAT value */
 	andl	$0xffff00ff, %edx	/* Clear PA5 */
 	orl	$0x00000500, %edx	/* Set PA5 to WP */
 	wrmsr
-	pop	%rdx			/* RDX contains original PAT value */
-	pop	%rcx
-
-	movq	%rcx, %r9		/* Save kernel length */
-	movq	%rdi, %r10		/* Save encrypted kernel address */
-	movq	%rsi, %r11		/* Save decrypted kernel address */
 
 	wbinvd				/* Invalidate any cache entries */
 
-	/* Copy/encrypt 2MB at a time */
+	/* Copy/encrypt up to 2MB at a time */
+	movq	$PMD_PAGE_SIZE, %r12
 1:
-	movq	%r11, %rsi		/* Source - decrypted kernel */
+	cmpq	%r12, %r9
+	jnb	2f
+	movq	%r9, %r12
+
+2:
+	movq	%r11, %rsi		/* Source - decrypted area */
 	movq	%r8, %rdi		/* Dest   - intermediate copy buffer */
-	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	movq	%r12, %rcx
 	rep	movsb
 
 	movq	%r8, %rsi		/* Source - intermediate copy buffer */
-	movq	%r10, %rdi		/* Dest   - encrypted kernel */
-	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	movq	%r10, %rdi		/* Dest   - encrypted area */
+	movq	%r12, %rcx
 	rep	movsb
 
-	addq	$PMD_PAGE_SIZE, %r11
-	addq	$PMD_PAGE_SIZE, %r10
-	subq	$PMD_PAGE_SIZE, %r9	/* Kernel length decrement */
+	addq	%r12, %r11
+	addq	%r12, %r10
+	subq	%r12, %r9		/* Kernel length decrement */
 	jnz	1b			/* Kernel length not zero? */
 
 	/* Restore PAT register */
-	push	%rdx			/* Save original PAT value */
 	movl	$MSR_IA32_CR_PAT, %ecx
 	rdmsr
-	pop	%rdx			/* Restore original PAT value */
+	mov	%r15, %rdx		/* Restore original PAT value */
 	wrmsr
 
+	pop	%r12
+	pop	%r15
+
 	ret
 .L__enc_copy_end:
 ENDPROC(__enc_copy)
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 06097ef30449..b511f6d24b42 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
 	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
 #define VMCOREINFO_SIZE(name) \
 	vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
 			      (unsigned long)sizeof(name))
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..4f63597c824d 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #ifdef CONFIG_SPARSEMEM
-	VMCOREINFO_SYMBOL(mem_section);
+	VMCOREINFO_SYMBOL_ARRAY(mem_section);
 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
 	VMCOREINFO_STRUCT_SIZE(mem_section);
 	VMCOREINFO_OFFSET(mem_section, section_mem_map);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-01-12 13:56 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2018-01-12 13:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 9d0513d82f1a8fe17b41f113ac5922fa57dbaf5c x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const"

There two pending (non-PTI) x86 fixes:

 - an Intel-MID crash fix
 - and an Intel microcode loader blacklist quirk to avoid a problematic revision.

 Thanks,

	Ingo

------------------>
Andy Shevchenko (1):
      x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const"

Jia Zhang (1):
      x86/microcode/intel: Extend BDW late-loading with a revision check


 arch/x86/kernel/cpu/microcode/intel.c                 | 13 +++++++++++--
 arch/x86/platform/intel-mid/device_libs/platform_bt.c |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8ccdca6d3f9e..d9e460fc7a3b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-		pr_err_once("late loading on model 79 is disabled.\n");
+	/*
+	 * Late loading on model 79 with microcode revision less than 0x0b000021
+	 * may result in a system hang. This behavior is documented in item
+	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+	 */
+	if (c->x86 == 6 &&
+	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
+	    c->x86_mask == 0x01 &&
+	    c->microcode < 0x0b000021) {
+		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
 		return true;
 	}
 
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48..5a0483e7bf66 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
 	return 0;
 }
 
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
 	.setup	= tng_bt_sfi_setup,
 };
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-12-15 15:43 Ingo Molnar
  2017-12-15 15:50 ` Andy Lutomirski
  0 siblings, 1 reply; 505+ messages in thread
From: Ingo Molnar @ 2017-12-15 15:43 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Josh Poimboeuf, Andy Lutomirski, Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 215eada73e77ede7e15531d99f712481ddd429be objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version

Misc fixes:

 - Fix the s2ram regression related to confusion around segment register 
   restoration, plus related cleanups that make the code more robust

 - a guess-unwinder Kconfig dependency fix

 - an isoimage build target fix for certain tool chain combinations

 - instruction decoder opcode map fixes+updates, and the syncing of the kernel 
   decoder headers to the objtool headers

 - a kmmio tracing fix

 - two 5-level paging related fixes

 - a topology enumeration fix on certain SMP systems

 Thanks,

	Ingo

------------------>
Andrey Ryabinin (1):
      x86/unwinder/guess: Prevent using CONFIG_UNWINDER_GUESS=y with CONFIG_STACKDEPOT=y

Andy Lutomirski (3):
      x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
      x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
      x86/power: Make restore_processor_context() sane

Changbin Du (1):
      x86/build: Don't verify mtools configuration file for isoimage

Ingo Molnar (1):
      objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version

Karol Herbst (1):
      x86/mm/kmmio: Fix mmiotrace for page unaligned addresses

Kirill A. Shutemov (2):
      x86/boot/compressed/64: Detect and handle 5-level paging at boot-time
      x86/boot/compressed/64: Print error if 5-level paging is not supported

Prarit Bhargava (1):
      x86/smpboot: Do not use smp_num_siblings in __max_logical_packages calculation

Randy Dunlap (1):
      x86/decoder: Fix and update the opcodes map


 arch/x86/Kconfig.debug                             |  1 +
 arch/x86/boot/compressed/Makefile                  |  1 +
 arch/x86/boot/compressed/head_64.S                 | 16 +++-
 arch/x86/boot/compressed/misc.c                    | 16 ++++
 arch/x86/boot/compressed/pgtable_64.c              | 28 ++++++
 arch/x86/boot/genimage.sh                          |  4 +-
 arch/x86/include/asm/suspend_32.h                  |  8 +-
 arch/x86/include/asm/suspend_64.h                  | 19 ++++-
 arch/x86/kernel/smpboot.c                          |  4 +-
 arch/x86/lib/x86-opcode-map.txt                    | 13 ++-
 arch/x86/mm/ioremap.c                              |  4 +-
 arch/x86/mm/kmmio.c                                | 12 +--
 arch/x86/power/cpu.c                               | 99 ++++++++++------------
 tools/objtool/arch/x86/insn/inat.h                 | 10 +++
 tools/objtool/arch/x86/insn/x86-opcode-map.txt     | 15 +++-
 .../perf/util/intel-pt-decoder/x86-opcode-map.txt  | 15 +++-
 16 files changed, 184 insertions(+), 81 deletions(-)
 create mode 100644 arch/x86/boot/compressed/pgtable_64.c

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
 config UNWINDER_GUESS
 	bool "Guess unwinder"
 	depends on EXPERT
+	depends on !STACKDEPOT
 	---help---
 	  This option enables the "guess" unwinder for unwinding kernel stack
 	  traces.  It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
 ifdef CONFIG_X86_64
 	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
 	vmlinux-objs-y += $(obj)/mem_encrypt.o
+	vmlinux-objs-y += $(obj)/pgtable_64.o
 endif
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
 	leaq	boot_stack_end(%rbx), %rsp
 
 #ifdef CONFIG_X86_5LEVEL
-	/* Check if 5-level paging has already enabled */
-	movq	%cr4, %rax
-	testl	$X86_CR4_LA57, %eax
-	jnz	lvl5
+	/*
+	 * Check if we need to enable 5-level paging.
+	 * RSI holds real mode data and need to be preserved across
+	 * a function call.
+	 */
+	pushq	%rsi
+	call	l5_paging_required
+	popq	%rsi
+
+	/* If l5_paging_required() returned zero, we're done here. */
+	cmpq	$0, %rax
+	je	lvl5
 
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
 	}
 }
 
+static bool l5_supported(void)
+{
+	/* Check if leaf 7 is supported. */
+	if (native_cpuid_eax(0) < 7)
+		return 0;
+
+	/* Check if la57 is supported. */
+	return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
 #if CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len,
 			       unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	console_init();
 	debug_putstr("early console in extract_kernel\n");
 
+	if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+		error("This linux kernel as configured requires 5-level paging\n"
+			"This CPU does not support the required 'cr4.la57' feature\n"
+			"Unable to boot - please use a kernel appropriate for your CPU\n");
+	}
+
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+	/* Check if leaf 7 is supported. */
+
+	if (native_cpuid_eax(0) < 7)
+		return 0;
+
+	/* Check if la57 is supported. */
+	if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+		return 0;
+
+	/* Check if 5-level paging has already been enabled. */
+	if (native_read_cr4() & X86_CR4_LA57)
+		return 0;
+
+	return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..c9e8499fbfe7 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
 
 # Make sure the files actually exist
 verify "$FBZIMAGE"
-verify "$MTOOLSRC"
 
 genbzdisk() {
+	verify "$MTOOLSRC"
 	mformat a:
 	syslinux $FIMAGE
 	echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
 }
 
 genfdimage144() {
+	verify "$MTOOLSRC"
 	dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
 	mformat v:
 	syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
 }
 
 genfdimage288() {
+	verify "$MTOOLSRC"
 	dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
 	mformat w:
 	syslinux $FIMAGE
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
 
 /* image of the saved processor state */
 struct saved_context {
-	u16 es, fs, gs, ss;
+	/*
+	 * On x86_32, all segment registers, with the possible exception of
+	 * gs, are saved at kernel entry in pt_regs.
+	 */
+#ifdef CONFIG_X86_32_LAZY_GS
+	u16 gs;
+#endif
 	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
 	bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
  */
 struct saved_context {
 	struct pt_regs regs;
-	u16 ds, es, fs, gs, ss;
-	unsigned long gs_base, gs_kernel_base, fs_base;
+
+	/*
+	 * User CS and SS are saved in current_pt_regs().  The rest of the
+	 * segment selectors need to be saved and restored here.
+	 */
+	u16 ds, es, fs, gs;
+
+	/*
+	 * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+	 * so we save them separately.  We save the kernelmode GSBASE to
+	 * restore percpu access after resume.
+	 */
+	unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
 	unsigned long cr0, cr2, cr3, cr4, cr8;
 	u64 misc_enable;
 	bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
 	u16 gdt_pad; /* Unused */
 	struct desc_ptr gdt_desc;
 	u16 idt_pad;
-	u16 idt_limit;
-	unsigned long idt_base;
+	struct desc_ptr idt;
 	u16 ldt;
 	u16 tss;
 	unsigned long tr;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 05a97d5fe298..35cb20994e32 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
 
 /* Flag to indicate if a complete sched domain rebuild is required */
 bool x86_topology_update;
@@ -1304,7 +1304,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	 * Today neither Intel nor AMD support heterogenous systems so
 	 * extrapolate the boot cpu's data to all packages.
 	 */
-	ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
 	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
 	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
+	mmiotrace_iounmap(addr);
+
 	addr = (volatile void __iomem *)
 		(PAGE_MASK & (unsigned long __force)addr);
 
-	mmiotrace_iounmap(addr);
-
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
 	   in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	unsigned long flags;
 	int ret = 0;
 	unsigned long size = 0;
+	unsigned long addr = p->addr & PAGE_MASK;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	unsigned int l;
 	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
-	if (get_kmmio_probe(p->addr)) {
+	if (get_kmmio_probe(addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
 
-	pte = lookup_address(p->addr, &l);
+	pte = lookup_address(addr, &l);
 	if (!pte) {
 		ret = -EINVAL;
 		goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
-		if (add_kmmio_fault_page(p->addr + size))
+		if (add_kmmio_fault_page(addr + size))
 			pr_err("Unable to set page fault.\n");
 		size += page_level_size(l);
 	}
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 {
 	unsigned long flags;
 	unsigned long size = 0;
+	unsigned long addr = p->addr & PAGE_MASK;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
 	unsigned int l;
 	pte_t *pte;
 
-	pte = lookup_address(p->addr, &l);
+	pte = lookup_address(addr, &l);
 	if (!pte)
 		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
-		release_kmmio_fault_page(p->addr + size, &release_list);
+		release_kmmio_fault_page(addr + size, &release_list);
 		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5191de14f4df..36a28eddb435 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * descriptor tables
 	 */
-#ifdef CONFIG_X86_32
 	store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
 	/*
 	 * We save it here, but restore it only in the hibernate case.
 	 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * segment registers
 	 */
-#ifdef CONFIG_X86_32
-	savesegment(es, ctxt->es);
-	savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
 	savesegment(gs, ctxt->gs);
-	savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
-	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
-	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
-	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
-	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+	savesegment(gs, ctxt->gs);
+	savesegment(fs, ctxt->fs);
+	savesegment(ds, ctxt->ds);
+	savesegment(es, ctxt->es);
 
 	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
-	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
-	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
 	mtrr_save_fixed_ranges(NULL);
 
 	rdmsrl(MSR_EFER, ctxt->efer);
@@ -178,6 +170,9 @@ static void fix_processor_context(void)
 	write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
 
 	syscall_init();				/* This sets MSR_*STAR and related */
+#else
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu();
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_mm_ldt(current->active_mm);	/* This does lldt */
@@ -190,9 +185,12 @@ static void fix_processor_context(void)
 }
 
 /**
- *	__restore_processor_state - restore the contents of CPU registers saved
- *		by __save_processor_state()
- *	@ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ *                             by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
  */
 static void notrace __restore_processor_state(struct saved_context *ctxt)
 {
@@ -215,57 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	write_cr2(ctxt->cr2);
 	write_cr0(ctxt->cr0);
 
+	/* Restore the IDT. */
+	load_idt(&ctxt->idt);
+
 	/*
-	 * now restore the descriptor tables to their proper values
-	 * ltr is done i fix_processor_context().
+	 * Just in case the asm code got us here with the SS, DS, or ES
+	 * out of sync with the GDT, update them.
 	 */
-#ifdef CONFIG_X86_32
-	load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
+	loadsegment(ss, __KERNEL_DS);
+	loadsegment(ds, __USER_DS);
+	loadsegment(es, __USER_DS);
 
-#ifdef CONFIG_X86_64
 	/*
-	 * We need GSBASE restored before percpu access can work.
-	 * percpu access can happen in exception handlers or in complicated
-	 * helpers like load_gs_index().
+	 * Restore percpu access.  Percpu access can happen in exception
+	 * handlers or in complicated helpers like load_gs_index().
 	 */
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#ifdef CONFIG_X86_64
+	wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+#else
+	loadsegment(fs, __KERNEL_PERCPU);
+	loadsegment(gs, __KERNEL_STACK_CANARY);
 #endif
 
+	/* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
 	fix_processor_context();
 
 	/*
-	 * Restore segment registers.  This happens after restoring the GDT
-	 * and LDT, which happen in fix_processor_context().
+	 * Now that we have descriptor tables fully restored and working
+	 * exception handling, restore the usermode segments.
 	 */
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+	loadsegment(ds, ctxt->es);
 	loadsegment(es, ctxt->es);
 	loadsegment(fs, ctxt->fs);
-	loadsegment(gs, ctxt->gs);
-	loadsegment(ss, ctxt->ss);
-
-	/*
-	 * sysenter MSRs
-	 */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
-	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
-	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
 	load_gs_index(ctxt->gs);
-	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
 
 	/*
-	 * Restore FSBASE and user GSBASE after reloading the respective
-	 * segment selectors.
+	 * Restore FSBASE and GSBASE after restoring the selectors, since
+	 * restoring the selectors clobbers the bases.  Keep in mind
+	 * that MSR_KERNEL_GS_BASE is horribly misnamed.
 	 */
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+	loadsegment(gs, ctxt->gs);
 #endif
 
 	do_fpu_end();
diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h
index 125ecd2a300d..52dc8d911173 100644
--- a/tools/objtool/arch/x86/insn/inat.h
+++ b/tools/objtool/arch/x86/insn/inat.h
@@ -97,6 +97,16 @@
 #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
 #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
 
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE	0
+#define INAT_SEG_REG_DEFAULT	1
+#define INAT_SEG_REG_CS		2
+#define INAT_SEG_REG_SS		3
+#define INAT_SEG_REG_DS		4
+#define INAT_SEG_REG_ES		5
+#define INAT_SEG_REG_FS		6
+#define INAT_SEG_REG_GS		7
+
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
 extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-12-06 22:36 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-12-06 22:36 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 88edb57d1e0b262e669c5cad36646dcf5a7f37f5 x86/vdso: Change time() prototype to match __vdso_time()

Misc fixes:

 - Make CR4 handling irq-safe, which bug vmware guests ran into
 - Don't crash on early IRQs in Xen guests
 - Don't crash secondary CPU bringup if #UD assisted WARN()ings are triggered
 - Make X86_BUG_FXSAVE_LEAK optional on newer AMD CPUs that have the fix
 - Fix AMD Fam17h microcode loading
 - Fix broadcom_postcore_init() if ACPI is disabled
 - Fix resume regression in __restore_processor_context()
 - Fix Sparse warnings
 - Fix a GCC-8 warning

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/power: Fix some ordering bugs in __restore_processor_context()

Arnd Bergmann (1):
      x86/vdso: Change time() prototype to match __vdso_time()

Chunyu Hu (1):
      x86/idt: Load idt early in start_secondary

Colin Ian King (1):
      x86: Fix Sparse warnings about non-static functions

Juergen Gross (1):
      x86/xen: Support early interrupts in xen pv guests

Nadav Amit (2):
      x86/tlb: Refactor CR4 setting and shadow write
      x86/tlb: Disable interrupts when changing CR4

Rafael J. Wysocki (1):
      x86/PCI: Make broadcom_postcore_init() check acpi_disabled

Rudolf Marek (1):
      x86/cpufeatures: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD

Tom Lendacky (1):
      x86/microcode/AMD: Add support for fam17h microcode loading


 arch/x86/entry/vdso/vclock_gettime.c |  2 +-
 arch/x86/include/asm/cpufeatures.h   |  1 +
 arch/x86/include/asm/segment.h       | 12 ++++++++++++
 arch/x86/include/asm/tlbflush.h      | 35 ++++++++++++++++++----------------
 arch/x86/kernel/apic/vector.c        |  4 ++--
 arch/x86/kernel/cpu/amd.c            |  7 +++++--
 arch/x86/kernel/cpu/microcode/amd.c  |  4 ++++
 arch/x86/kernel/process.c            |  2 +-
 arch/x86/kernel/smpboot.c            |  2 +-
 arch/x86/mm/extable.c                |  4 +++-
 arch/x86/pci/broadcom_bus.c          |  2 +-
 arch/x86/platform/uv/uv_nmi.c        |  4 ++--
 arch/x86/power/cpu.c                 | 21 ++++++++++++++++----
 arch/x86/xen/enlighten_pv.c          | 37 +++++++++++++++++++++++-------------
 arch/x86/xen/xen-asm_64.S            | 14 ++++++++++++++
 15 files changed, 107 insertions(+), 44 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d5..f19856d95c60 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
 		*t = result;
 	return result;
 }
-int time(time_t *t)
+time_t time(time_t *t)
 	__attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..800104c8a3ed 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -266,6 +266,7 @@
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c..8f09012b92e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
  */
 #define EARLY_IDT_HANDLER_SIZE 9
 
+/*
+ * xen_early_idt_handler_array is for Xen pv guests: for each entry in
+ * early_idt_handler_array it contains a prequel in the form of
+ * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
+ * max 8 bytes.
+ */
+#define XEN_EARLY_IDT_HANDLER_SIZE 8
+
 #ifndef __ASSEMBLY__
 
 extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 extern void early_ignore_irq(void);
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
+#endif
+
 /*
  * Load a segment. Fall back on loading the zero segment if something goes
  * wrong.  This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..877b5c1a1b12 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -173,40 +173,43 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
+static inline void __cr4_set(unsigned long cr4)
+{
+	lockdep_assert_irqs_disabled();
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Set in this cpu's CR4. */
 static inline void cr4_set_bits(unsigned long mask)
 {
-	unsigned long cr4;
+	unsigned long cr4, flags;
 
+	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 | mask) != cr4) {
-		cr4 |= mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
-	}
+	if ((cr4 | mask) != cr4)
+		__cr4_set(cr4 | mask);
+	local_irq_restore(flags);
 }
 
 /* Clear in this cpu's CR4. */
 static inline void cr4_clear_bits(unsigned long mask)
 {
-	unsigned long cr4;
+	unsigned long cr4, flags;
 
+	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 & ~mask) != cr4) {
-		cr4 &= ~mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
-	}
+	if ((cr4 & ~mask) != cr4)
+		__cr4_set(cr4 & ~mask);
+	local_irq_restore(flags);
 }
 
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	cr4 ^= mask;
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
+	__cr4_set(cr4 ^ mask);
 }
 
 /* Read the CR4 shadow. */
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff..750449152b04 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,8 +542,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 }
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
-			   struct irq_data *irqd, int ind)
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+				  struct irq_data *irqd, int ind)
 {
 	unsigned int cpu, vector, prev_cpu, prev_vector;
 	struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..bcb75dc97d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x17: init_amd_zn(c); break;
 	}
 
-	/* Enable workaround for FXSAVE leak */
-	if (c->x86 >= 6)
+	/*
+	 * Enable workaround for FXSAVE leak on CPUs
+	 * without a XSaveErPtr feature
+	 */
+	if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
 		set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
 
 	cpu_detect_cache_sizes(c);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 #define F14H_MPB_MAX_SIZE 1824
 #define F15H_MPB_MAX_SIZE 4096
 #define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
 
 	switch (family) {
 	case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	case 0x16:
 		max_size = F16H_MPB_MAX_SIZE;
 		break;
+	case 0x17:
+		max_size = F17H_MPB_MAX_SIZE;
+		break;
 	default:
 		max_size = F1XH_MPB_MAX_SIZE;
 		break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5..bb988a24db92 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -299,7 +299,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	}
 
 	if ((tifp ^ tifn) & _TIF_NOTSC)
-		cr4_toggle_bits(X86_CR4_TSD);
+		cr4_toggle_bits_irqsoff(X86_CR4_TSD);
 
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf6..05a97d5fe298 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -237,7 +237,7 @@ static void notrace start_secondary(void *unused)
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 #endif
-
+	load_current_idt();
 	cpu_init();
 	x86_cpuinit.early_percpu_clock_init();
 	preempt_disable();
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66c..88754bfd425f 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
+#include <xen/xen.h>
 
 #include <asm/fpu/internal.h>
 #include <asm/traps.h>
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	 * Old CPUs leave the high bits of CS on the stack
 	 * undefined.  I'm not sure which CPUs do this, but at least
 	 * the 486 DX works this way.
+	 * Xen pv domains are not using the default __KERNEL_CS.
 	 */
-	if (regs->cs != __KERNEL_CS)
+	if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
 		goto fail;
 
 	/*
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
 	 * We should get host bridge information from ACPI unless the BIOS
 	 * doesn't support it.
 	 */
-	if (acpi_os_get_root_pointer())
+	if (!acpi_disabled && acpi_os_get_root_pointer())
 		return 0;
 #endif
 
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c..5f64f30873e2 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
 /*
  * UV NMI handler
  */
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 {
 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
 	int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
 }
 
 /* Setup HUB NMI info */
-void __init uv_nmi_setup_common(bool hubbed)
+static void __init uv_nmi_setup_common(bool hubbed)
 {
 	int size = sizeof(void *) * (1 << NODES_SHIFT);
 	int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f..5191de14f4df 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -226,8 +226,20 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
 #endif
 
+#ifdef CONFIG_X86_64
 	/*
-	 * segment registers
+	 * We need GSBASE restored before percpu access can work.
+	 * percpu access can happen in exception handlers or in complicated
+	 * helpers like load_gs_index().
+	 */
+	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#endif
+
+	fix_processor_context();
+
+	/*
+	 * Restore segment registers.  This happens after restoring the GDT
+	 * and LDT, which happen in fix_processor_context().
 	 */
 #ifdef CONFIG_X86_32
 	loadsegment(es, ctxt->es);
@@ -248,13 +260,14 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	load_gs_index(ctxt->gs);
 	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
 
+	/*
+	 * Restore FSBASE and user GSBASE after reloading the respective
+	 * segment selectors.
+	 */
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 #endif
 
-	fix_processor_context();
-
 	do_fpu_end();
 	tsc_verify_tsc_adjust(true);
 	x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f6531..f2414c6c5e7c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -622,7 +622,7 @@ static struct trap_array_entry trap_array[] = {
 	{ simd_coprocessor_error,      xen_simd_coprocessor_error,      false },
 };
 
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
 {
 	unsigned int nr;
 	bool ist_okay = false;
@@ -644,6 +644,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
 		}
 	}
 
+	if (nr == ARRAY_SIZE(trap_array) &&
+	    *addr >= (void *)early_idt_handler_array[0] &&
+	    *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+		nr = (*addr - (void *)early_idt_handler_array[0]) /
+		     EARLY_IDT_HANDLER_SIZE;
+		*addr = (void *)xen_early_idt_handler_array[nr];
+	}
+
 	if (WARN_ON(ist != 0 && !ist_okay))
 		return false;
 
@@ -1262,6 +1270,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	xen_setup_gdt(0);
 
 	xen_init_irq_ops();
+
+	/* Let's presume PV guests always boot on vCPU with id 0. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
+	/*
+	 * Setup xen_vcpu early because idt_setup_early_handler needs it for
+	 * local_irq_disable(), irqs_disabled().
+	 *
+	 * Don't do the full vcpu_info placement stuff until we have
+	 * the cpu_possible_mask and a non-dummy shared_info.
+	 */
+	xen_vcpu_info_reset(0);
+
+	idt_setup_early_handler();
+
 	xen_init_capabilities();
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1318,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	acpi_numa = -1;
 #endif
-	/* Let's presume PV guests always boot on vCPU with id 0. */
-	per_cpu(xen_vcpu_id, 0) = 0;
-
-	/*
-	 * Setup xen_vcpu early because start_kernel needs it for
-	 * local_irq_disable(), irqs_disabled().
-	 *
-	 * Don't do the full vcpu_info placement stuff until we have
-	 * the cpu_possible_mask and a non-dummy shared_info.
-	 */
-	xen_vcpu_info_reset(0);
-
 	WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
 
 	local_irq_disable();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
 
 #include <xen/interface/xen.h>
 
+#include <linux/init.h>
 #include <linux/linkage.h>
 
 .macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
 #endif
 xen_pv_trap hypervisor_callback
 
+	__INIT
+ENTRY(xen_early_idt_handler_array)
+	i = 0
+	.rept NUM_EXCEPTION_VECTORS
+	pop %rcx
+	pop %r11
+	jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+	i = i + 1
+	.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+	.endr
+END(xen_early_idt_handler_array)
+	__FINIT
+
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
  * Xen64 iret frame:

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-11-26 12:48 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-11-26 12:48 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Andy Lutomirski

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 12a78d43de767eaf8fb272facb7a7b6f2dc6a9df x86/decoder: Add new TEST instruction pattern

Misc fixes:

 - topology enumeration fixes
 - KASAN fix
 - two entry fixes (not yet the big series related to KASLR)
 - remove obsolete code
 - instruction decoder fix
 - better /dev/mem sanity checks, hopefully working better this time
 - pkeys fixes
 - two ACPI fixes
 - 5-level paging related fixes
 - UMIP fixes that should make application visible faults more debuggable
 - boot fix for weird virtualization environment

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/pci/Kconfig                # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  drivers/pci/Makefile               # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  drivers/pci/htirq.c                # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  include/linux/htirq.h              # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  include/linux/pci.h                # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  tools/testing/selftests/x86/5lvl.c # 97f404ad3e53: x86/selftests: Add test for 
  tools/testing/selftests/x86/Makefile# 97f404ad3e53: x86/selftests: Add test for 
  tools/testing/selftests/x86/mpx-hw.h# a6400120d042: x86/mpx/selftests: Fix up we
  tools/testing/selftests/x86/pkey-helpers.h# 7b659ee3e1fe: x86/pkeys/selftests: Fix pro
  tools/testing/selftests/x86/protection_keys.c# 91c49c2deb96: x86/pkeys/selftests: Rename 

 Thanks,

	Ingo

------------------>
Andi Kleen (2):
      perf/x86/intel/uncore: Cache logical pkg id in uncore driver
      x86/topology: Avoid wasting 128k for package id array

Andrey Ryabinin (1):
      x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow

Andy Lutomirski (2):
      x86/entry/64: Fix entry_SYSCALL_64_after_hwframe() IRQ tracing
      x86/entry/64: Add missing irqflags tracing to native_load_gs_index()

Bjorn Helgaas (1):
      x86/PCI: Remove unused HyperTransport interrupt support

Borislav Petkov (1):
      x86/umip: Fix insn_get_code_seg_params()'s return value

Chao Fan (1):
      x86/boot/KASLR: Remove unused variable

Craig Bergstrom (1):
      x86/mm: Limit mmap() of /dev/mem to valid physical addresses

Dave Hansen (4):
      x86/pkeys: Update documentation about availability
      x86/mpx/selftests: Fix up weird arrays
      x86/pkeys/selftests: Rename 'si_pkey' to 'siginfo_pkey'
      x86/pkeys/selftests: Fix protection keys write() warning

Kirill A. Shutemov (2):
      x86/mm: Prevent non-MAP_FIXED mapping across DEFAULT_MAP_WINDOW border
      x86/selftests: Add test for mapping placement for 5-level paging

Masami Hiramatsu (1):
      x86/decoder: Add new TEST instruction pattern

Prarit Bhargava (1):
      x86/smpboot: Fix __max_logical_packages estimate

Ricardo Neri (4):
      x86/umip: Select X86_INTEL_UMIP by default
      x86/umip: Print a line in the boot log that UMIP has been enabled
      x86/umip: Identify the STR and SLDT instructions
      x86/umip: Print a warning into the syslog if UMIP-protected instructions are used

Tom Lendacky (1):
      x86/boot: Fix boot failure when SMP MP-table is based at 0

Vikas C Sajjan (2):
      x86/acpi: Handle SCI interrupts above legacy space gracefully
      x86/acpi: Reduce code duplication in mp_override_legacy_irq()


 Documentation/x86/protection-keys.txt         |   9 +-
 arch/x86/Kconfig                              |  14 +-
 arch/x86/boot/compressed/kaslr.c              |   5 +-
 arch/x86/entry/entry_64.S                     |  14 +-
 arch/x86/events/intel/uncore.c                |   4 +-
 arch/x86/events/intel/uncore.h                |   2 +-
 arch/x86/events/intel/uncore_snbep.c          |   2 +-
 arch/x86/include/asm/elf.h                    |   1 +
 arch/x86/include/asm/hw_irq.h                 |   8 --
 arch/x86/include/asm/hypertransport.h         |  46 ------
 arch/x86/include/asm/insn-eval.h              |   2 +-
 arch/x86/include/asm/io.h                     |   4 +
 arch/x86/include/asm/irqdomain.h              |   6 -
 arch/x86/include/asm/processor.h              |   1 +
 arch/x86/kernel/acpi/boot.c                   |  61 +++++---
 arch/x86/kernel/apic/Makefile                 |   1 -
 arch/x86/kernel/apic/htirq.c                  | 198 --------------------------
 arch/x86/kernel/apic/vector.c                 |   5 +-
 arch/x86/kernel/cpu/common.c                  |   2 +
 arch/x86/kernel/mpparse.c                     |   6 +-
 arch/x86/kernel/smpboot.c                     | 128 +++++------------
 arch/x86/kernel/sys_x86_64.c                  |  10 +-
 arch/x86/kernel/umip.c                        |  88 ++++++++++--
 arch/x86/lib/insn-eval.c                      |   4 +-
 arch/x86/lib/x86-opcode-map.txt               |   2 +-
 arch/x86/mm/hugetlbpage.c                     |  11 +-
 arch/x86/mm/kasan_init_64.c                   | 143 ++++++++++++++++++-
 arch/x86/mm/mmap.c                            |  62 ++++++++
 drivers/char/mem.c                            |   4 +
 drivers/pci/Kconfig                           |   9 --
 drivers/pci/Makefile                          |   3 -
 drivers/pci/htirq.c                           | 135 ------------------
 include/linux/htirq.h                         |  39 -----
 include/linux/pci.h                           |   6 -
 tools/testing/selftests/x86/5lvl.c            | 177 +++++++++++++++++++++++
 tools/testing/selftests/x86/Makefile          |   2 +-
 tools/testing/selftests/x86/mpx-hw.h          |   4 +-
 tools/testing/selftests/x86/pkey-helpers.h    |   5 +-
 tools/testing/selftests/x86/protection_keys.c |  10 +-
 39 files changed, 609 insertions(+), 624 deletions(-)
 delete mode 100644 arch/x86/include/asm/hypertransport.h
 delete mode 100644 arch/x86/kernel/apic/htirq.c
 delete mode 100644 drivers/pci/htirq.c
 delete mode 100644 include/linux/htirq.h
 create mode 100644 tools/testing/selftests/x86/5lvl.c

diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt
index fa46dcb347bc..ecb0d2dadfb7 100644
--- a/Documentation/x86/protection-keys.txt
+++ b/Documentation/x86/protection-keys.txt
@@ -1,5 +1,10 @@
-Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
-which will be found on future Intel CPUs.
+Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
+which is found on Intel's Skylake "Scalable Processor" Server CPUs.
+It will be avalable in future non-server parts.
+
+For anyone wishing to test or use this feature, it is available in
+Amazon's EC2 C5 instances and is known to work there using an Ubuntu
+17.04 image.
 
 Memory Protection Keys provides a mechanism for enforcing page-based
 protections, but without requiring modification of the page tables
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f08977d82ca0..09dcc94c4484 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -110,7 +110,7 @@ config X86
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
 	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_ARCH_KASAN			if X86_64 && SPARSEMEM_VMEMMAP
+	select HAVE_ARCH_KASAN			if X86_64
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
@@ -1805,14 +1805,20 @@ config X86_SMAP
 	  If unsure, say Y.
 
 config X86_INTEL_UMIP
-	def_bool n
+	def_bool y
 	depends on CPU_SUP_INTEL
 	prompt "Intel User Mode Instruction Prevention" if EXPERT
 	---help---
 	  The User Mode Instruction Prevention (UMIP) is a security
 	  feature in newer Intel processors. If enabled, a general
-	  protection fault is issued if the instructions SGDT, SLDT,
-	  SIDT, SMSW and STR are executed in user mode.
+	  protection fault is issued if the SGDT, SLDT, SIDT, SMSW
+	  or STR instructions are executed in user mode. These instructions
+	  unnecessarily expose information about the hardware state.
+
+	  The vast majority of applications do not use these instructions.
+	  For the very few that do, software emulation is provided in
+	  specific cases in protected and virtual-8086 modes. Emulated
+	  results are dummy.
 
 config X86_INTEL_MPX
 	prompt "Intel MPX (Memory Protection Extensions)"
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a63fbc25ce84..8199a6187251 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
 static void mem_avoid_memmap(char *str)
 {
 	static int i;
-	int rc;
 
 	if (i >= MAX_MEMMAP_REGIONS)
 		return;
@@ -219,7 +218,7 @@ static int handle_mem_memmap(void)
 		return 0;
 
 	tmp_cmdline = malloc(len + 1);
-	if (!tmp_cmdline )
+	if (!tmp_cmdline)
 		error("Failed to allocate space for tmp_cmdline");
 
 	memcpy(tmp_cmdline, args, len);
@@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	cmd_line |= boot_params->hdr.cmd_line_ptr;
 	/* Calculate size of cmd_line. */
 	ptr = (char *)(unsigned long)cmd_line;
-	for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+	for (cmd_line_size = 0; ptr[cmd_line_size++];)
 		;
 	mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
 	mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a2b30ec69497..f81d50d7ceac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64)
 END(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
-.macro TRACE_IRQS_IRETQ
+.macro TRACE_IRQS_FLAGS flags:req
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bt	$9, EFLAGS(%rsp)		/* interrupts off? */
+	bt	$9, \flags		/* interrupts off? */
 	jnc	1f
 	TRACE_IRQS_ON
 1:
 #endif
 .endm
 
+.macro TRACE_IRQS_IRETQ
+	TRACE_IRQS_FLAGS EFLAGS(%rsp)
+.endm
+
 /*
  * When dynamic function tracer is enabled it will add a breakpoint
  * to all locations that it is about to modify, sync CPUs, update
@@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64)
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	TRACE_IRQS_OFF
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER_DS			/* pt_regs->ss */
 	pushq	PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
@@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
 	UNWIND_HINT_REGS extra=0
 
+	TRACE_IRQS_OFF
+
 	/*
 	 * If we need to do entry work or if we guess we'll need to do
 	 * exit work, go straight to the slow path.
@@ -943,11 +947,13 @@ ENTRY(native_load_gs_index)
 	FRAME_BEGIN
 	pushfq
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+	TRACE_IRQS_OFF
 	SWAPGS
 .Lgs_change:
 	movl	%edi, %gs
 2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
 	SWAPGS
+	TRACE_IRQS_FLAGS (%rsp)
 	popfq
 	FRAME_END
 	ret
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d45e06346f14..7874c980d569 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 	int i, phys_id, pkg;
 
 	phys_id = uncore_pcibus_to_physid(pdev->bus);
-	pkg = topology_phys_to_logical_pkg(phys_id);
 
 	box = pci_get_drvdata(pdev);
 	if (!box) {
+		pkg = topology_phys_to_logical_pkg(phys_id);
 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
 			if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
 				uncore_extra_pci_dev[pkg].dev[i] = NULL;
@@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 		return;
 
 	pci_set_drvdata(pdev, NULL);
-	pmu->boxes[pkg] = NULL;
+	pmu->boxes[box->pkgid] = NULL;
 	if (atomic_dec_return(&pmu->activeboxes) == 0)
 		uncore_pmu_unregister(pmu);
 	uncore_box_exit(box);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4364191e7c6b..414dc7e7c950 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -100,7 +100,7 @@ struct intel_uncore_extra_reg {
 
 struct intel_uncore_box {
 	int pci_phys_id;
-	int pkgid;
+	int pkgid;	/* Logical package ID */
 	int n_active;	/* number of active events */
 	int n_events;
 	int cpu;	/* cpu to collect events */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 95cb19f4e06f..de8f8625213c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
 
 	if (reg1->idx != EXTRA_REG_NONE) {
 		int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
-		int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+		int pkg = box->pkgid;
 		struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
 
 		if (filter_pdev) {
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3a091cea36c5..0d157d2a1e2a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void)
 extern unsigned long task_size_32bit(void);
 extern unsigned long task_size_64bit(int full_addr_space);
 extern unsigned long get_mmap_base(int is_legacy);
+extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b80e46733909..2851077b6051 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -99,14 +99,6 @@ struct irq_alloc_info {
 			void		*dmar_data;
 		};
 #endif
-#ifdef	CONFIG_HT_IRQ
-		struct {
-			int		ht_pos;
-			int		ht_idx;
-			struct pci_dev	*ht_dev;
-			void		*ht_update;
-		};
-#endif
 #ifdef	CONFIG_X86_UV
 		struct {
 			int		uv_limit;
diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
deleted file mode 100644
index 5d55df352879..000000000000
--- a/arch/x86/include/asm/hypertransport.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_HYPERTRANSPORT_H
-#define _ASM_X86_HYPERTRANSPORT_H
-
-/*
- * Constants for x86 Hypertransport Interrupts.
- */
-
-#define HT_IRQ_LOW_BASE			0xf8000000
-
-#define HT_IRQ_LOW_VECTOR_SHIFT		16
-#define HT_IRQ_LOW_VECTOR_MASK		0x00ff0000
-#define HT_IRQ_LOW_VECTOR(v)						\
-	(((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK)
-
-#define HT_IRQ_LOW_DEST_ID_SHIFT	8
-#define HT_IRQ_LOW_DEST_ID_MASK		0x0000ff00
-#define HT_IRQ_LOW_DEST_ID(v)						\
-	(((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK)
-
-#define HT_IRQ_LOW_DM_PHYSICAL		0x0000000
-#define HT_IRQ_LOW_DM_LOGICAL		0x0000040
-
-#define HT_IRQ_LOW_RQEOI_EDGE		0x0000000
-#define HT_IRQ_LOW_RQEOI_LEVEL		0x0000020
-
-
-#define HT_IRQ_LOW_MT_FIXED		0x0000000
-#define HT_IRQ_LOW_MT_ARBITRATED	0x0000004
-#define HT_IRQ_LOW_MT_SMI		0x0000008
-#define HT_IRQ_LOW_MT_NMI		0x000000c
-#define HT_IRQ_LOW_MT_INIT		0x0000010
-#define HT_IRQ_LOW_MT_STARTUP		0x0000014
-#define HT_IRQ_LOW_MT_EXTINT		0x0000018
-#define HT_IRQ_LOW_MT_LINT1		0x000008c
-#define HT_IRQ_LOW_MT_LINT0		0x0000098
-
-#define HT_IRQ_LOW_IRQ_MASKED		0x0000001
-
-
-#define HT_IRQ_HIGH_DEST_ID_SHIFT	0
-#define HT_IRQ_HIGH_DEST_ID_MASK	0x00ffffff
-#define HT_IRQ_HIGH_DEST_ID(v)						\
-	((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
-
-#endif /* _ASM_X86_HYPERTRANSPORT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index e1d3b4ce8a92..2b6ccf2c49f1 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -18,6 +18,6 @@
 void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
 int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
 unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
-char insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_code_seg_params(struct pt_regs *regs);
 
 #endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 93ae8aee1780..95e948627fd0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
 
 #endif
 
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
 /**
  *	virt_to_phys	-	map virtual addresses to physical
  *	@address: address to remap
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index f695cc6b8e1f..139feef467f7 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain);
 static inline void arch_init_msi_domain(struct irq_domain *domain) { }
 #endif
 
-#ifdef CONFIG_HT_IRQ
-extern void arch_init_htirq_domain(struct irq_domain *domain);
-#else
-static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
-#endif
-
 #endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2db7cf720b04..cc16fa882e3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,7 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 	u32			microcode;
+	unsigned		initialized : 1;
 } __randomize_layout;
 
 struct cpuid_regs {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ef9e02e614d0..f4c463df8b08 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
 #ifdef CONFIG_X86_IO_APIC
 #define MP_ISA_BUS		0
 
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+						u8 trigger, u32 gsi);
+
 static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 					  u32 gsi)
 {
-	int ioapic;
-	int pin;
-	struct mpc_intsrc mp_irq;
-
 	/*
 	 * Check bus_irq boundary.
 	 */
@@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 		return;
 	}
 
-	/*
-	 * Convert 'gsi' to 'ioapic.pin'.
-	 */
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0)
-		return;
-	pin = mp_find_ioapic_pin(ioapic, gsi);
-
 	/*
 	 * TBD: This check is for faulty timer entries, where the override
 	 *      erroneously sets the trigger to level, resulting in a HUGE
@@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	if ((bus_irq == 0) && (trigger == 3))
 		trigger = 1;
 
-	mp_irq.type = MP_INTSRC;
-	mp_irq.irqtype = mp_INT;
-	mp_irq.irqflag = (trigger << 2) | polarity;
-	mp_irq.srcbus = MP_ISA_BUS;
-	mp_irq.srcbusirq = bus_irq;	/* IRQ */
-	mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
-	mp_irq.dstirq = pin;	/* INTIN# */
-
-	mp_save_irq(&mp_irq);
-
+	if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+		return;
 	/*
 	 * Reset default identity mapping if gsi is also an legacy IRQ,
 	 * otherwise there will be more than one entry with the same GSI
@@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
 	return 0;
 }
 
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+						u8 trigger, u32 gsi)
+{
+	struct mpc_intsrc mp_irq;
+	int ioapic, pin;
+
+	/* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0) {
+		pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+		return ioapic;
+	}
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (trigger << 2) | polarity;
+	mp_irq.srcbus = MP_ISA_BUS;
+	mp_irq.srcbusirq = bus_irq;
+	mp_irq.dstapic = mpc_ioapic_id(ioapic);
+	mp_irq.dstirq = pin;
+
+	mp_save_irq(&mp_irq);
+
+	return 0;
+}
+
 static int __init
 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
 	if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
 		polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
 
-	mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	if (bus_irq < NR_IRQS_LEGACY)
+		mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	else
+		mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
 	acpi_penalize_sci_irq(bus_irq, trigger, polarity);
 
 	/*
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index a9e08924927e..a6fcaf16cdbf 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -12,7 +12,6 @@ obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_PCI_MSI)		+= msi.o
-obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
deleted file mode 100644
index b07075dce8b7..000000000000
--- a/arch/x86/kernel/apic/htirq.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Support Hypertransport IRQ
- *
- * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- *	Moved from arch/x86/kernel/apic/io_apic.c.
- * Jiang Liu <jiang.liu@linux.intel.com>
- *	Add support of hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/htirq.h>
-#include <asm/irqdomain.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/hypertransport.h>
-
-static struct irq_domain *htirq_domain;
-
-/*
- * Hypertransport interrupt support
- */
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_data *parent = data->parent_data;
-	int ret;
-
-	ret = parent->chip->irq_set_affinity(parent, mask, force);
-	if (ret >= 0) {
-		struct ht_irq_msg msg;
-		struct irq_cfg *cfg = irqd_cfg(data);
-
-		fetch_ht_irq_msg(data->irq, &msg);
-		msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
-				    HT_IRQ_LOW_DEST_ID_MASK);
-		msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
-				  HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
-		msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-		msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
-		write_ht_irq_msg(data->irq, &msg);
-	}
-
-	return ret;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= irq_chip_ack_parent,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= irq_chip_retrigger_hierarchy,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs, void *arg)
-{
-	struct ht_irq_cfg *ht_cfg;
-	struct irq_alloc_info *info = arg;
-	struct pci_dev *dev;
-	irq_hw_number_t hwirq;
-	int ret;
-
-	if (nr_irqs > 1 || !info)
-		return -EINVAL;
-
-	dev = info->ht_dev;
-	hwirq = (info->ht_idx & 0xFF) |
-		PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
-		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
-	if (irq_find_mapping(domain, hwirq) > 0)
-		return -EEXIST;
-
-	ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
-	if (!ht_cfg)
-		return -ENOMEM;
-
-	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
-	if (ret < 0) {
-		kfree(ht_cfg);
-		return ret;
-	}
-
-	/* Initialize msg to a value that will never match the first write. */
-	ht_cfg->msg.address_lo = 0xffffffff;
-	ht_cfg->msg.address_hi = 0xffffffff;
-	ht_cfg->dev = info->ht_dev;
-	ht_cfg->update = info->ht_update;
-	ht_cfg->pos = info->ht_pos;
-	ht_cfg->idx = 0x10 + (info->ht_idx * 2);
-	irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
-			    handle_edge_irq, ht_cfg, "edge");
-
-	return 0;
-}
-
-static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs)
-{
-	struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
-
-	BUG_ON(nr_irqs != 1);
-	kfree(irq_data->chip_data);
-	irq_domain_free_irqs_top(domain, virq, nr_irqs);
-}
-
-static int htirq_domain_activate(struct irq_domain *domain,
-				 struct irq_data *irq_data, bool early)
-{
-	struct ht_irq_msg msg;
-	struct irq_cfg *cfg = irqd_cfg(irq_data);
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-	write_ht_irq_msg(irq_data->irq, &msg);
-	return 0;
-}
-
-static void htirq_domain_deactivate(struct irq_domain *domain,
-				    struct irq_data *irq_data)
-{
-	struct ht_irq_msg msg;
-
-	memset(&msg, 0, sizeof(msg));
-	write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static const struct irq_domain_ops htirq_domain_ops = {
-	.alloc		= htirq_domain_alloc,
-	.free		= htirq_domain_free,
-	.activate	= htirq_domain_activate,
-	.deactivate	= htirq_domain_deactivate,
-};
-
-void __init arch_init_htirq_domain(struct irq_domain *parent)
-{
-	struct fwnode_handle *fn;
-
-	if (disable_apic)
-		return;
-
-	fn = irq_domain_alloc_named_fwnode("PCI-HT");
-	if (!fn)
-		goto warn;
-
-	htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
-	irq_domain_free_fwnode(fn);
-	if (!htirq_domain)
-		goto warn;
-
-	htirq_domain->parent = parent;
-	return;
-
-warn:
-	pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
-}
-
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
-		      ht_irq_update_t *update)
-{
-	struct irq_alloc_info info;
-
-	if (!htirq_domain)
-		return -ENOSYS;
-
-	init_irq_alloc_info(&info, NULL);
-	info.ht_idx = idx;
-	info.ht_pos = pos;
-	info.ht_dev = dev;
-	info.ht_update = update;
-
-	return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
-				     &info);
-}
-
-void arch_teardown_ht_irq(unsigned int irq)
-{
-	irq_domain_free_irqs(irq, 1);
-}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 05c85e693a5d..6a823a25eaff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,5 +1,5 @@
 /*
- * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
  *
  * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
  *	Moved from arch/x86/kernel/apic/io_apic.c.
@@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
 		nr_irqs = NR_VECTORS * nr_cpu_ids;
 
 	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+#if defined(CONFIG_PCI_MSI)
 	/*
 	 * for MSI and HT dyn irq
 	 */
@@ -663,7 +663,6 @@ int __init arch_early_irq_init(void)
 	irq_set_default_host(x86_vector_domain);
 
 	arch_init_msi_domain(x86_vector_domain);
-	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 13ae9e5eec2f..fa998ca8aa5a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
 
 	cr4_set_bits(X86_CR4_UMIP);
 
+	pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
 	return;
 
 out:
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 410c5dadcee3..3a4b12809ab5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 }
 
 static unsigned long mpf_base;
+static bool mpf_found;
 
 static unsigned long __init get_mpc_size(unsigned long physptr)
 {
@@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
 	if (!smp_found_config)
 		return;
 
-	if (!mpf_base)
+	if (!mpf_found)
 		return;
 
 	if (acpi_lapic && early)
@@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 			smp_found_config = 1;
 #endif
 			mpf_base = base;
+			mpf_found = true;
 
 			pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
 				base, base + sizeof(*mpf) - 1, mpf);
@@ -858,7 +860,7 @@ static int __init update_mp_table(void)
 	if (!enable_update_mptable)
 		return 0;
 
-	if (!mpf_base)
+	if (!mpf_found)
 		return 0;
 
 	mpf = early_memremap(mpf_base, sizeof(*mpf));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5f59e6bee123..3d01df7d7cf6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* Logical package management. We might want to allocate that dynamically */
-static int *physical_to_logical_pkg __read_mostly;
-static unsigned long *physical_package_map __read_mostly;;
-static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
@@ -280,6 +277,25 @@ static void notrace start_secondary(void *unused)
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+		if (c->initialized && c->phys_proc_id == phys_pkg)
+			return c->logical_proc_id;
+	}
+	return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
 /**
  * topology_update_package_map - Update the physical to logical package map
  * @pkg:	The physical package id as retrieved via CPUID
@@ -287,102 +303,23 @@ static void notrace start_secondary(void *unused)
  */
 int topology_update_package_map(unsigned int pkg, unsigned int cpu)
 {
-	unsigned int new;
+	int new;
 
-	/* Called from early boot ? */
-	if (!physical_package_map)
-		return 0;
-
-	if (pkg >= max_physical_pkg_id)
-		return -EINVAL;
-
-	/* Set the logical package id */
-	if (test_and_set_bit(pkg, physical_package_map))
+	/* Already available somewhere? */
+	new = topology_phys_to_logical_pkg(pkg);
+	if (new >= 0)
 		goto found;
 
-	if (logical_packages >= __max_logical_packages) {
-		pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
-			logical_packages, cpu, __max_logical_packages);
-		return -ENOSPC;
-	}
-
 	new = logical_packages++;
 	if (new != pkg) {
 		pr_info("CPU %u Converting physical %u to logical package %u\n",
 			cpu, pkg, new);
 	}
-	physical_to_logical_pkg[pkg] = new;
-
 found:
-	cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+	cpu_data(cpu).logical_proc_id = new;
 	return 0;
 }
 
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
-	if (phys_pkg >= max_physical_pkg_id)
-		return -1;
-	return physical_to_logical_pkg[phys_pkg];
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
-{
-	unsigned int ncpus;
-	size_t size;
-
-	/*
-	 * Today neither Intel nor AMD support heterogenous systems. That
-	 * might change in the future....
-	 *
-	 * While ideally we'd want '* smp_num_siblings' in the below @ncpus
-	 * computation, this won't actually work since some Intel BIOSes
-	 * report inconsistent HT data when they disable HT.
-	 *
-	 * In particular, they reduce the APIC-IDs to only include the cores,
-	 * but leave the CPUID topology to say there are (2) siblings.
-	 * This means we don't know how many threads there will be until
-	 * after the APIC enumeration.
-	 *
-	 * By not including this we'll sometimes over-estimate the number of
-	 * logical packages by the amount of !present siblings, but this is
-	 * still better than MAX_LOCAL_APIC.
-	 *
-	 * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
-	 * on the command line leading to a similar issue as the HT disable
-	 * problem because the hyperthreads are usually enumerated after the
-	 * primary cores.
-	 */
-	ncpus = boot_cpu_data.x86_max_cores;
-	if (!ncpus) {
-		pr_warn("x86_max_cores == zero !?!?");
-		ncpus = 1;
-	}
-
-	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
-	logical_packages = 0;
-
-	/*
-	 * Possibly larger than what we need as the number of apic ids per
-	 * package can be smaller than the actual used apic ids.
-	 */
-	max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
-	size = max_physical_pkg_id * sizeof(unsigned int);
-	physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
-	memset(physical_to_logical_pkg, 0xff, size);
-	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
-	physical_package_map = kzalloc(size, GFP_KERNEL);
-
-	pr_info("Max logical packages: %u\n", __max_logical_packages);
-
-	topology_update_package_map(c->phys_proc_id, cpu);
-}
-
 void __init smp_store_boot_cpu_info(void)
 {
 	int id = 0; /* CPU 0 */
@@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
 
 	*c = boot_cpu_data;
 	c->cpu_index = id;
-	smp_init_package_map(c, id);
+	topology_update_package_map(c->phys_proc_id, id);
+	c->initialized = true;
 }
 
 /*
@@ -401,13 +339,16 @@ void smp_store_cpu_info(int id)
 {
 	struct cpuinfo_x86 *c = &cpu_data(id);
 
-	*c = boot_cpu_data;
+	/* Copy boot_cpu_data only on the first bringup */
+	if (!c->initialized)
+		*c = boot_cpu_data;
 	c->cpu_index = id;
 	/*
 	 * During boot time, CPU0 has this setup already. Save the info when
 	 * bringing up AP or offlined CPU0.
 	 */
 	identify_secondary_cpu(c);
+	c->initialized = true;
 }
 
 static bool
@@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void)
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
 {
+	int ncpus;
+
 	pr_debug("Boot done\n");
+	/*
+	 * Today neither Intel nor AMD support heterogenous systems so
+	 * extrapolate the boot cpu's data to all packages.
+	 */
+	ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
 	if (x86_has_numa_in_package)
 		set_sched_topology(x86_numa_in_package_topology);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a63fe77b3217..676774b9bb8d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+	/* No address checking. See comment at mmap_address_hint_valid() */
 	if (flags & MAP_FIXED)
 		return addr;
 
@@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	/* requesting a specific address */
 	if (addr) {
-		addr = PAGE_ALIGN(addr);
+		addr &= PAGE_MASK;
+		if (!mmap_address_hint_valid(addr, len))
+			goto get_unmapped_area;
+
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vm_start_gap(vma)))
+		if (!vma || addr + len <= vm_start_gap(vma))
 			return addr;
 	}
+get_unmapped_area:
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 6ba82be68cff..f44ce0fb3583 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -78,7 +78,60 @@
 
 #define	UMIP_INST_SGDT	0	/* 0F 01 /0 */
 #define	UMIP_INST_SIDT	1	/* 0F 01 /1 */
-#define	UMIP_INST_SMSW	3	/* 0F 01 /4 */
+#define	UMIP_INST_SMSW	2	/* 0F 01 /4 */
+#define	UMIP_INST_SLDT  3       /* 0F 00 /0 */
+#define	UMIP_INST_STR   4       /* 0F 00 /1 */
+
+const char * const umip_insns[5] = {
+	[UMIP_INST_SGDT] = "SGDT",
+	[UMIP_INST_SIDT] = "SIDT",
+	[UMIP_INST_SMSW] = "SMSW",
+	[UMIP_INST_SLDT] = "SLDT",
+	[UMIP_INST_STR] = "STR",
+};
+
+#define umip_pr_err(regs, fmt, ...) \
+	umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
+#define umip_pr_warning(regs, fmt, ...) \
+	umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
+
+/**
+ * umip_printk() - Print a rate-limited message
+ * @regs:	Register set with the context in which the warning is printed
+ * @log_level:	Kernel log level to print the message
+ * @fmt:	The text string to print
+ *
+ * Print the text contained in @fmt. The print rate is limited to bursts of 5
+ * messages every two minutes. The purpose of this customized version of
+ * printk() is to print messages when user space processes use any of the
+ * UMIP-protected instructions. Thus, the printed text is prepended with the
+ * task name and process ID number of the current task as well as the
+ * instruction and stack pointers in @regs as seen when entering kernel mode.
+ *
+ * Returns:
+ *
+ * None.
+ */
+static __printf(3, 4)
+void umip_printk(const struct pt_regs *regs, const char *log_level,
+		 const char *fmt, ...)
+{
+	/* Bursts of 5 messages every two minutes */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
+	struct task_struct *tsk = current;
+	struct va_format vaf;
+	va_list args;
+
+	if (!__ratelimit(&ratelimit))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
+	       task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
+	va_end(args);
+}
 
 /**
  * identify_insn() - Identify a UMIP-protected instruction
@@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn)
 		default:
 			return -EINVAL;
 		}
+	} else if (insn->opcode.bytes[1] == 0x0) {
+		if (X86_MODRM_REG(insn->modrm.value) == 0)
+			return UMIP_INST_SLDT;
+		else if (X86_MODRM_REG(insn->modrm.value) == 1)
+			return UMIP_INST_STR;
+		else
+			return -EINVAL;
+	} else {
+		return -EINVAL;
 	}
-
-	/* SLDT AND STR are not emulated */
-	return -EINVAL;
 }
 
 /**
@@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
 	if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
 		return;
 
-	pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n",
-			   tsk->comm, task_pid_nr(tsk), regs->ip,
-			   regs->sp, X86_PF_USER | X86_PF_WRITE,
-			   regs->ip);
+	umip_pr_err(regs, "segfault in emulation. error%x\n",
+		    X86_PF_USER | X86_PF_WRITE);
 }
 
 /**
@@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	unsigned char buf[MAX_INSN_SIZE];
 	void __user *uaddr;
 	struct insn insn;
-	char seg_defs;
+	int seg_defs;
 
 	if (!regs)
 		return false;
 
-	/* Do not emulate 64-bit processes. */
-	if (user_64bit_mode(regs))
-		return false;
-
 	/*
 	 * If not in user-space long mode, a custom code segment could be in
 	 * use. This is true in protected mode (if the process defined a local
@@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	if (umip_inst < 0)
 		return false;
 
+	umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+			umip_insns[umip_inst]);
+
+	/* Do not emulate SLDT, STR or user long mode processes. */
+	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+		return false;
+
+	umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+
 	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
 		return false;
 
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 35625d279458..9119d8e41f1f 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
  *
  * Returns:
  *
- * A signed 8-bit value containing the default parameters on success.
+ * An int containing ORed-in default parameters on success.
  *
  * -EINVAL on error.
  */
-char insn_get_code_seg_params(struct pt_regs *regs)
+int insn_get_code_seg_params(struct pt_regs *regs)
 {
 	struct desc_struct *desc;
 	short sel;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4..c4d55919fac1 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8ae0000cbdb3..00b296617ca4 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+	/* No address checking. See comment at mmap_address_hint_valid() */
 	if (flags & MAP_FIXED) {
 		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
@@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	}
 
 	if (addr) {
-		addr = ALIGN(addr, huge_page_size(h));
+		addr &= huge_page_mask(h);
+		if (!mmap_address_hint_valid(addr, len))
+			goto get_unmapped_area;
+
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vm_start_gap(vma)))
+		if (!vma || addr + len <= vm_start_gap(vma))
 			return addr;
 	}
+
+get_unmapped_area:
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
 		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 				pgoff, flags);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 2b60dc6e64b1..99dfed6dfef8 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -4,12 +4,14 @@
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
 #include <linux/kdebug.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/vmalloc.h>
 
 #include <asm/e820/types.h>
+#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
@@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
-static int __init map_range(struct range *range)
+static __init void *early_alloc(size_t size, int nid)
+{
+	return memblock_virt_alloc_try_nid_nopanic(size, size,
+		__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+}
+
+static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	pte_t *pte;
+
+	if (pmd_none(*pmd)) {
+		void *p;
+
+		if (boot_cpu_has(X86_FEATURE_PSE) &&
+		    ((end - addr) == PMD_SIZE) &&
+		    IS_ALIGNED(addr, PMD_SIZE)) {
+			p = early_alloc(PMD_SIZE, nid);
+			if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
+				return;
+			else if (p)
+				memblock_free(__pa(p), PMD_SIZE);
+		}
+
+		p = early_alloc(PAGE_SIZE, nid);
+		pmd_populate_kernel(&init_mm, pmd, p);
+	}
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		pte_t entry;
+		void *p;
+
+		if (!pte_none(*pte))
+			continue;
+
+		p = early_alloc(PAGE_SIZE, nid);
+		entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
+		set_pte_at(&init_mm, addr, pte, entry);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	if (pud_none(*pud)) {
+		void *p;
+
+		if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
+		    ((end - addr) == PUD_SIZE) &&
+		    IS_ALIGNED(addr, PUD_SIZE)) {
+			p = early_alloc(PUD_SIZE, nid);
+			if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
+				return;
+			else if (p)
+				memblock_free(__pa(p), PUD_SIZE);
+		}
+
+		p = early_alloc(PAGE_SIZE, nid);
+		pud_populate(&init_mm, pud, p);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (!pmd_large(*pmd))
+			kasan_populate_pmd(pmd, addr, next, nid);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	if (p4d_none(*p4d)) {
+		void *p = early_alloc(PAGE_SIZE, nid);
+
+		p4d_populate(&init_mm, p4d, p);
+	}
+
+	pud = pud_offset(p4d, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (!pud_large(*pud))
+			kasan_populate_pud(pud, addr, next, nid);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	void *p;
+	p4d_t *p4d;
+	unsigned long next;
+
+	if (pgd_none(*pgd)) {
+		p = early_alloc(PAGE_SIZE, nid);
+		pgd_populate(&init_mm, pgd, p);
+	}
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		kasan_populate_p4d(p4d, addr, next, nid);
+	} while (p4d++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
+					 int nid)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	addr = addr & PAGE_MASK;
+	end = round_up(end, PAGE_SIZE);
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		kasan_populate_pgd(pgd, addr, next, nid);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void __init map_range(struct range *range)
 {
 	unsigned long start;
 	unsigned long end;
@@ -26,7 +155,7 @@ static int __init map_range(struct range *range)
 	start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
 	end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
 
-	return vmemmap_populate(start, end, NUMA_NO_NODE);
+	kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
 }
 
 static void __init clear_pgds(unsigned long start,
@@ -189,16 +318,16 @@ void __init kasan_init(void)
 		if (pfn_mapped[i].end == 0)
 			break;
 
-		if (map_range(&pfn_mapped[i]))
-			panic("kasan: unable to allocate shadow!");
+		map_range(&pfn_mapped[i]);
 	}
+
 	kasan_populate_zero_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
 		kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
-	vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
-			(unsigned long)kasan_mem_to_shadow(_end),
-			NUMA_NO_NODE);
+	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+			      (unsigned long)kasan_mem_to_shadow(_end),
+			      early_pfn_to_nid(__pa(_stext)));
 
 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
 			(void *)KASAN_SHADOW_END);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a99679826846..155ecbac9e28 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -33,6 +33,8 @@
 #include <linux/compat.h>
 #include <asm/elf.h>
 
+#include "physaddr.h"
+
 struct va_alignment __read_mostly va_align = {
 	.flags = -1,
 };
@@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[mpx]";
 	return NULL;
 }
+
+/**
+ * mmap_address_hint_valid - Validate the address hint of mmap
+ * @addr:	Address hint
+ * @len:	Mapping length
+ *
+ * Check whether @addr and @addr + @len result in a valid mapping.
+ *
+ * On 32bit this only checks whether @addr + @len is <= TASK_SIZE.
+ *
+ * On 64bit with 5-level page tables another sanity check is required
+ * because mappings requested by mmap(@addr, 0) which cross the 47-bit
+ * virtual address boundary can cause the following theoretical issue:
+ *
+ *  An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr
+ *  is below the border of the 47-bit address space and @addr + @len is
+ *  above the border.
+ *
+ *  With 4-level paging this request succeeds, but the resulting mapping
+ *  address will always be within the 47-bit virtual address space, because
+ *  the hint address does not result in a valid mapping and is
+ *  ignored. Hence applications which are not prepared to handle virtual
+ *  addresses above 47-bit work correctly.
+ *
+ *  With 5-level paging this request would be granted and result in a
+ *  mapping which crosses the border of the 47-bit virtual address
+ *  space. If the application cannot handle addresses above 47-bit this
+ *  will lead to misbehaviour and hard to diagnose failures.
+ *
+ * Therefore ignore address hints which would result in a mapping crossing
+ * the 47-bit virtual address boundary.
+ *
+ * Note, that in the same scenario with MAP_FIXED the behaviour is
+ * different. The request with @addr < 47-bit and @addr + @len > 47-bit
+ * fails on a 4-level paging machine but succeeds on a 5-level paging
+ * machine. It is reasonable to expect that an application does not rely on
+ * the failure of such a fixed mapping request, so the restriction is not
+ * applied.
+ */
+bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+{
+	if (TASK_SIZE - len < addr)
+		return false;
+
+	return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
+}
+
+/* Can we access it for direct reading/writing? Must be RAM: */
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
+{
+	return addr + count <= __pa(high_memory);
+}
+
+/* Can we access it through mmap? Must be a valid physical address: */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
+{
+	phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
+
+	return phys_addr_valid(addr + count - 1);
+}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 970e1242a282..6aefe5370e5b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -343,6 +343,10 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 	size_t size = vma->vm_end - vma->vm_start;
 	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
 
+	/* Does it even fit in phys_addr_t? */
+	if (offset >> PAGE_SHIFT != vma->vm_pgoff)
+		return -EINVAL;
+
 	/* It's illegal to wrap around the end of the physical address space. */
 	if (offset + (phys_addr_t)size - 1 < offset)
 		return -EINVAL;
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index c32a77fc8b03..99ae5e30eabe 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -71,15 +71,6 @@ config XEN_PCIDEV_FRONTEND
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
 
-config HT_IRQ
-	bool "Interrupts on hypertransport devices"
-	default y
-	depends on PCI && X86_LOCAL_APIC
-	help
-	   This allows native hypertransport devices to use interrupts.
-
-	   If unsure say Y.
-
 config PCI_ATS
 	bool
 
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 80adbdbcecce..ab0104e0ffac 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -24,9 +24,6 @@ endif
 # Build the PCI MSI interrupt support
 obj-$(CONFIG_PCI_MSI) += msi.o
 
-# Build the Hypertransport interrupt support
-obj-$(CONFIG_HT_IRQ) += htirq.o
-
 obj-$(CONFIG_PCI_ATS) += ats.o
 obj-$(CONFIG_PCI_IOV) += iov.o
 
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
deleted file mode 100644
index bb88c26f5144..000000000000
--- a/drivers/pci/htirq.c
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * File:	htirq.c
- * Purpose:	Hypertransport Interrupt Capability
- *
- * Copyright (C) 2006 Linux Networx
- * Copyright (C) Eric Biederman <ebiederman@lnxi.com>
- */
-
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/htirq.h>
-
-/* Global ht irq lock.
- *
- * This is needed to serialize access to the data port in hypertransport
- * irq capability.
- *
- * With multiple simultaneous hypertransport irq devices it might pay
- * to make this more fine grained.  But start with simple, stupid, and correct.
- */
-static DEFINE_SPINLOCK(ht_irq_lock);
-
-void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
-{
-	struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	if (cfg->msg.address_lo != msg->address_lo) {
-		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
-		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_lo);
-	}
-	if (cfg->msg.address_hi != msg->address_hi) {
-		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
-		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi);
-	}
-	if (cfg->update)
-		cfg->update(cfg->dev, irq, msg);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-	cfg->msg = *msg;
-}
-
-void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
-{
-	struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
-
-	*msg = cfg->msg;
-}
-
-void mask_ht_irq(struct irq_data *data)
-{
-	struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
-	struct ht_irq_msg msg = cfg->msg;
-
-	msg.address_lo |= 1;
-	write_ht_irq_msg(data->irq, &msg);
-}
-
-void unmask_ht_irq(struct irq_data *data)
-{
-	struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
-	struct ht_irq_msg msg = cfg->msg;
-
-	msg.address_lo &= ~1;
-	write_ht_irq_msg(data->irq, &msg);
-}
-
-/**
- * __ht_create_irq - create an irq and attach it to a device.
- * @dev: The hypertransport device to find the irq capability on.
- * @idx: Which of the possible irqs to attach to.
- * @update: Function to be called when changing the htirq message
- *
- * The irq number of the new irq or a negative error value is returned.
- */
-int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
-{
-	int max_irq, pos, irq;
-	unsigned long flags;
-	u32 data;
-
-	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
-	if (!pos)
-		return -EINVAL;
-
-	/* Verify the idx I want to use is in range */
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(dev, pos + 2, 1);
-	pci_read_config_dword(dev, pos + 4, &data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-
-	max_irq = (data >> 16) & 0xff;
-	if (idx > max_irq)
-		return -EINVAL;
-
-	irq = arch_setup_ht_irq(idx, pos, dev, update);
-	if (irq > 0)
-		dev_dbg(&dev->dev, "irq %d for HT\n", irq);
-
-	return irq;
-}
-EXPORT_SYMBOL(__ht_create_irq);
-
-/**
- * ht_create_irq - create an irq and attach it to a device.
- * @dev: The hypertransport device to find the irq capability on.
- * @idx: Which of the possible irqs to attach to.
- *
- * ht_create_irq needs to be called for all hypertransport devices
- * that generate irqs.
- *
- * The irq number of the new irq or a negative error value is returned.
- */
-int ht_create_irq(struct pci_dev *dev, int idx)
-{
-	return __ht_create_irq(dev, idx, NULL);
-}
-EXPORT_SYMBOL(ht_create_irq);
-
-/**
- * ht_destroy_irq - destroy an irq created with ht_create_irq
- * @irq: irq to be destroyed
- *
- * This reverses ht_create_irq removing the specified irq from
- * existence.  The irq should be free before this happens.
- */
-void ht_destroy_irq(unsigned int irq)
-{
-	arch_teardown_ht_irq(irq);
-}
-EXPORT_SYMBOL(ht_destroy_irq);
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
deleted file mode 100644
index 127c39d815ba..000000000000
--- a/include/linux/htirq.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_HTIRQ_H
-#define LINUX_HTIRQ_H
-
-struct pci_dev;
-struct irq_data;
-
-struct ht_irq_msg {
-	u32	address_lo;	/* low 32 bits of the ht irq message */
-	u32	address_hi;	/* high 32 bits of the it irq message */
-};
-
-typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
-			       struct ht_irq_msg *msg);
-
-struct ht_irq_cfg {
-	struct pci_dev *dev;
-	 /* Update callback used to cope with buggy hardware */
-	ht_irq_update_t *update;
-	unsigned pos;
-	unsigned idx;
-	struct ht_irq_msg msg;
-};
-
-/* Helper functions.. */
-void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void mask_ht_irq(struct irq_data *data);
-void unmask_ht_irq(struct irq_data *data);
-
-/* The arch hook for getting things started */
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
-		      ht_irq_update_t *update);
-void arch_teardown_ht_irq(unsigned int irq);
-
-/* For drivers of buggy hardware */
-int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update);
-
-#endif /* LINUX_HTIRQ_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d16a7c037ec0..16287684dfe8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1484,12 +1484,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
 static inline void pcie_ecrc_get_policy(char *str) { }
 #endif
 
-#ifdef CONFIG_HT_IRQ
-/* The functions a driver should call */
-int  ht_create_irq(struct pci_dev *dev, int idx);
-void ht_destroy_irq(unsigned int irq);
-#endif /* CONFIG_HT_IRQ */
-
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c
new file mode 100644
index 000000000000..2eafdcd4c2b3
--- /dev/null
+++ b/tools/testing/selftests/x86/5lvl.c
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <sys/mman.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define PAGE_SIZE	4096
+#define LOW_ADDR	((void *) (1UL << 30))
+#define HIGH_ADDR	((void *) (1UL << 50))
+
+struct testcase {
+	void *addr;
+	unsigned long size;
+	unsigned long flags;
+	const char *msg;
+	unsigned int low_addr_required:1;
+	unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+	{
+		.addr = NULL,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1) again",
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap((1UL << 47), 2 * PAGE_SIZE)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE / 2),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)",
+	},
+	{
+		.addr = NULL,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB) again",
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE),
+		.size = 4UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)((1UL << 47) - (2UL << 20)),
+		.size = 4UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)",
+	},
+};
+
+int main(int argc, char **argv)
+{
+	int i;
+	void *p;
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		struct testcase *t = testcases + i;
+
+		p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0);
+
+		printf("%s: %p - ", t->msg, p);
+
+		if (p == MAP_FAILED) {
+			printf("FAILED\n");
+			continue;
+		}
+
+		if (t->low_addr_required && p >= (void *)(1UL << 47))
+			printf("FAILED\n");
+		else
+			printf("OK\n");
+		if (!t->keep_mapped)
+			munmap(p, t->size);
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 7b1adeee4b0f..939a337128db 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
index 3f0093911f03..d1b61ab870f8 100644
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -52,14 +52,14 @@
 struct mpx_bd_entry {
 	union {
 		char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
-		void *contents[1];
+		void *contents[0];
 	};
 } __attribute__((packed));
 
 struct mpx_bt_entry {
 	union {
 		char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
-		unsigned long contents[1];
+		unsigned long contents[0];
 	};
 } __attribute__((packed));
 
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
index 3818f25391c2..b3cb7670e026 100644
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -30,6 +30,7 @@ static inline void sigsafe_printf(const char *format, ...)
 	if (!dprint_in_signal) {
 		vprintf(format, ap);
 	} else {
+		int ret;
 		int len = vsnprintf(dprint_in_signal_buffer,
 				    DPRINT_IN_SIGNAL_BUF_SIZE,
 				    format, ap);
@@ -39,7 +40,9 @@ static inline void sigsafe_printf(const char *format, ...)
 		 */
 		if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
 			len = DPRINT_IN_SIGNAL_BUF_SIZE;
-		write(1, dprint_in_signal_buffer, len);
+		ret = write(1, dprint_in_signal_buffer, len);
+		if (ret < 0)
+			abort();
 	}
 	va_end(ap);
 }
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 7a1cc0e56d2d..bc1b0735bb50 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -250,7 +250,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	unsigned long ip;
 	char *fpregs;
 	u32 *pkru_ptr;
-	u64 si_pkey;
+	u64 siginfo_pkey;
 	u32 *si_pkey_ptr;
 	int pkru_offset;
 	fpregset_t fpregset;
@@ -292,9 +292,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
 	dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
 	dump_mem(si_pkey_ptr - 8, 24);
-	si_pkey = *si_pkey_ptr;
-	pkey_assert(si_pkey < NR_PKEYS);
-	last_si_pkey = si_pkey;
+	siginfo_pkey = *si_pkey_ptr;
+	pkey_assert(siginfo_pkey < NR_PKEYS);
+	last_si_pkey = siginfo_pkey;
 
 	if ((si->si_code == SEGV_MAPERR) ||
 	    (si->si_code == SEGV_ACCERR) ||
@@ -306,7 +306,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
 	/* need __rdpkru() version so we do not do shadow_pkru checking */
 	dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
-	dprintf1("si_pkey from siginfo: %jx\n", si_pkey);
+	dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
 	*(u64 *)pkru_ptr = 0x00000000;
 	dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
 	pkru_faults++;

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-11-05 14:46 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-11-05 14:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: eda9cec4c9a12208a6f69fbe68f72a6311d50032 x86/module: Detect and skip invalid relocations

Two fixes:

 - A PCID related revert that fixes power management and performance regressions.

 - The module loader robustization and sanity check commit is rather fresh, but it
   looked like a good idea to apply because of the hidden data corruption problem
   such invalid modules could cause.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  arch/ia64/include/asm/acpi.h       # 675357362aeb: Revert "x86/mm: Stop calling
  drivers/acpi/processor_idle.c      # 675357362aeb: Revert "x86/mm: Stop calling
  drivers/idle/intel_idle.c          # 675357362aeb: Revert "x86/mm: Stop calling

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      Revert "x86/mm: Stop calling leave_mm() in idle code"

Josh Poimboeuf (1):
      x86/module: Detect and skip invalid relocations


 arch/ia64/include/asm/acpi.h  |  2 ++
 arch/x86/include/asm/acpi.h   |  2 ++
 arch/x86/kernel/module.c      | 13 +++++++++++++
 arch/x86/mm/tlb.c             | 17 ++++++++++++++---
 drivers/acpi/processor_idle.c |  2 ++
 drivers/idle/intel_idle.c     |  9 +++++----
 6 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index c86a947f5368..a3d0211970e9 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -112,6 +112,8 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
 	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
 }
 
+#define acpi_unlazy_tlb(x)
+
 #ifdef CONFIG_ACPI_NUMA
 extern cpumask_t early_cpu_possible_map;
 #define for_each_possible_early_cpu(cpu)  \
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 72d867f6b518..8d0ec9df1cbe 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -150,6 +150,8 @@ static inline void disable_acpi(void) { }
 extern int x86_acpi_numa_init(void);
 #endif /* CONFIG_ACPI_NUMA */
 
+#define acpi_unlazy_tlb(x)	leave_mm(x)
+
 #ifdef CONFIG_ACPI_APEI
 static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 62e7d70aadd5..da0c160e5589 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -172,19 +172,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_X86_64_NONE:
 			break;
 		case R_X86_64_64:
+			if (*(u64 *)loc != 0)
+				goto invalid_relocation;
 			*(u64 *)loc = val;
 			break;
 		case R_X86_64_32:
+			if (*(u32 *)loc != 0)
+				goto invalid_relocation;
 			*(u32 *)loc = val;
 			if (val != *(u32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_32S:
+			if (*(s32 *)loc != 0)
+				goto invalid_relocation;
 			*(s32 *)loc = val;
 			if ((s64)val != *(s32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_PC32:
+			if (*(u32 *)loc != 0)
+				goto invalid_relocation;
 			val -= (u64)loc;
 			*(u32 *)loc = val;
 #if 0
@@ -200,6 +208,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	}
 	return 0;
 
+invalid_relocation:
+	pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
+	       (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+	return -ENOEXEC;
+
 overflow:
 	pr_err("overflow in relocation type %d val %Lx\n",
 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 0f3d0cea4d00..3118392cdf75 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -85,6 +85,7 @@ void leave_mm(int cpu)
 
 	switch_mm(NULL, &init_mm, NULL);
 }
+EXPORT_SYMBOL_GPL(leave_mm);
 
 void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	       struct task_struct *tsk)
@@ -195,12 +196,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
 			write_cr3(build_cr3(next, new_asid));
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
-					TLB_FLUSH_ALL);
+
+			/*
+			 * NB: This gets called via leave_mm() in the idle path
+			 * where RCU functions differently.  Tracing normally
+			 * uses RCU, so we need to use the _rcuidle variant.
+			 *
+			 * (There is no good reason for this.  The idle code should
+			 *  be rearranged to call this before rcu_idle_enter().)
+			 */
+			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
 			write_cr3(build_cr3_noflush(next, new_asid));
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
+
+			/* See above wrt _rcuidle. */
+			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
 		}
 
 		this_cpu_write(cpu_tlbstate.loaded_mm, next);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2736e25e9dc6..d50a7b6ccddd 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -710,6 +710,8 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
 static void acpi_idle_enter_bm(struct acpi_processor *pr,
 			       struct acpi_processor_cx *cx, bool timer_bc)
 {
+	acpi_unlazy_tlb(smp_processor_id());
+
 	/*
 	 * Must be done before busmaster disable as we might need to
 	 * access HPET !
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5dc7ea4b6bc4..f0b06b14e782 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -913,15 +913,16 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
 	struct cpuidle_state *state = &drv->states[index];
 	unsigned long eax = flg2MWAIT(state->flags);
 	unsigned int cstate;
+	int cpu = smp_processor_id();
 
 	cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
 
 	/*
-	 * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition
-	 * will probably flush the TLB.  It's not guaranteed to flush
-	 * the TLB, though, so it's not clear that we can do anything
-	 * useful with this knowledge.
+	 * leave_mm() to avoid costly and often unnecessary wakeups
+	 * for flushing the user TLB's associated with the active mm.
 	 */
+	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
+		leave_mm(cpu);
 
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		tick_broadcast_enter();

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-10-27 19:24 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-10-27 19:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 90edaac62729d3b9cbb97756261a0049a7fdd6a0 Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses"

Misc fixes:

 - revert a /dev/mem restriction change that crashes with certain boot parameters
 - an AMD erratum fix for cases where the BIOS doesn't apply it
 - fix unwinder debuginfo
 - improve ORC unwinder warning printouts

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't

Ingo Molnar (1):
      Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses"

Josh Poimboeuf (2):
      x86/entry: Fix idtentry unwind hint
      x86/unwind: Show function name+offset in ORC error messages


 arch/x86/entry/entry_64.S    |  2 +-
 arch/x86/include/asm/io.h    |  4 ----
 arch/x86/kernel/amd_nb.c     | 41 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/unwind_orc.c | 29 +++++++++++++++--------------
 arch/x86/mm/mmap.c           | 12 ------------
 5 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 49167258d587..f6cdb7a1455e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -808,7 +808,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
-	UNWIND_HINT_IRET_REGS offset=8
+	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
 	/* Sanity check */
 	.if \shift_ist != -1 && \paranoid == 0
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 322d25ae23ab..c40a95c33bb8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -110,10 +110,6 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
 
 #endif
 
-#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
-extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-
 /**
  *	virt_to_phys	-	map virtual addresses to physical
  *	@address: address to remap
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da8509b75..6db28f17ff28 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
 	{}
 };
 
+#define PCI_DEVICE_ID_AMD_CNB17H_F4     0x1704
+
 const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
 
@@ -402,11 +406,48 @@ void amd_flush_garts(void)
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+	msr_set_bit(MSR_AMD64_IC_CFG, 3);
+	msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+	struct pci_dev *F4;
+	u32 val;
+
+	if (boot_cpu_data.x86 != 0x14)
+		return;
+
+	if (!amd_northbridges.num)
+		return;
+
+	F4 = node_to_amd_nb(0)->link;
+	if (!F4)
+		return;
+
+	if (pci_read_config_dword(F4, 0x164, &val))
+		return;
+
+	if (val & BIT(2))
+		return;
+
+	on_each_cpu(__fix_erratum_688, NULL, 0);
+
+	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
+}
+
 static __init int init_amd_nbs(void)
 {
 	amd_cache_northbridges();
 	amd_cache_gart();
 
+	fix_erratum_688();
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 570b70d3f604..b95007e7c1b3 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
 
 		if (unlikely((idx >= lookup_num_blocks-1))) {
-			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
-				 idx, lookup_num_blocks, ip);
+			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+				 idx, lookup_num_blocks, (void *)ip);
 			return NULL;
 		}
 
@@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 
 		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
 			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
-			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
-				 idx, lookup_num_blocks, start, stop, ip);
+			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+				 idx, lookup_num_blocks, start, stop, (void *)ip);
 			return NULL;
 		}
 
@@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R10:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R10 at ip %p\n",
+			orc_warn("missing regs for base reg R10 at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R13:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R13 at ip %p\n",
+			orc_warn("missing regs for base reg R13 at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DI:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DI at ip %p\n",
+			orc_warn("missing regs for base reg DI at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DX:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DX at ip %p\n",
+			orc_warn("missing regs for base reg DX at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown SP base reg %d for ip %p\n",
+		orc_warn("unknown SP base reg %d for ip %pB\n",
 			 orc->sp_reg, (void *)state->ip);
 		goto done;
 	}
@@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
-			orc_warn("can't dereference registers at %p for ip %p\n",
+			orc_warn("can't dereference registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
@@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS_IRET:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
-			orc_warn("can't dereference iret registers at %p for ip %p\n",
+			orc_warn("can't dereference iret registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
@@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+		orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+			 orc->type, (void *)orig_ip);
 		break;
 	}
 
@@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown BP base reg %d for ip %p\n",
+		orc_warn("unknown BP base reg %d for ip %pB\n",
 			 orc->bp_reg, (void *)orig_ip);
 		goto done;
 	}
@@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->stack_info.type == prev_type &&
 	    on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
 	    state->sp <= prev_sp) {
-		orc_warn("stack going in the wrong direction? ip=%p\n",
+		orc_warn("stack going in the wrong direction? ip=%pB\n",
 			 (void *)orig_ip);
 		goto done;
 	}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 320c6237e1d1..a99679826846 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -174,15 +174,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[mpx]";
 	return NULL;
 }
-
-int valid_phys_addr_range(phys_addr_t addr, size_t count)
-{
-	return addr + count <= __pa(high_memory);
-}
-
-int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
-{
-	phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
-
-	return valid_phys_addr_range(addr, count);
-}

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-10-14 16:16 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-10-14 16:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 x86/microcode: Do the family check first

A landry list of fixes:

 - fix reboot breakage on some PCID-enabled system
 - fix crashes/hangs on some PCID-enabled systems
 - fix microcode loading on certain older CPUs
 - various unwinder fixes
 - extend an APIC quirk to more hardware systems and disable APIC related warning 
   on virtualized systems
 - various Hyper-V fixes
 - a macro definition robustness fix
 - remove jprobes IRQ disabling
 - various mem-encryption fixes

 Thanks,

	Ingo

------------------>
Andy Lutomirski (2):
      x86/mm/64: Fix reboot interaction with CR4.PCIDE
      x86/mm: Flush more aggressively in lazy TLB mode

Borislav Petkov (1):
      x86/microcode: Do the family check first

Josh Poimboeuf (5):
      kprobes/x86: Set up frame pointer in kprobe trampoline
      x86/unwind: Fix dereference of untrusted pointer
      x86/unwind: Use MSB for frame pointer encoding on 32-bit
      x86/unwind: Align stack pointer in unwinder dump
      x86/unwind: Disable unwinder warnings on 32-bit

Len Brown (1):
      x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping

Marcelo Henrique Cerri (1):
      x86/hyperv: Fix hypercalls with extended CPU ranges for TLB flushing

Masami Hiramatsu (1):
      kprobes/x86: Remove IRQ disabling from jprobe handlers

Mathias Krause (1):
      x86/alternatives: Fix alt_max_short macro to really be a max()

Paolo Bonzini (1):
      x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors

Tom Lendacky (1):
      x86/mm: Disable various instrumentations of mm/mem_encrypt.c and mm/tlb.c

Vitaly Kuznetsov (2):
      x86/hyperv: Clear vCPU banks between calls to avoid flushing unneeded vCPUs
      x86/hyperv: Don't use percpu areas for pcpu_flush/pcpu_flush_ex structures


 arch/x86/entry/entry_32.S              |   4 +-
 arch/x86/hyperv/hv_init.c              |   5 ++
 arch/x86/hyperv/mmu.c                  |  57 +++++++++---
 arch/x86/include/asm/alternative-asm.h |   4 +-
 arch/x86/include/asm/alternative.h     |   6 +-
 arch/x86/include/asm/mmu_context.h     |   8 +-
 arch/x86/include/asm/mshyperv.h        |   1 +
 arch/x86/include/asm/tlbflush.h        |  24 ++++++
 arch/x86/kernel/apic/apic.c            |  15 +++-
 arch/x86/kernel/cpu/microcode/core.c   |  27 ++++--
 arch/x86/kernel/kprobes/common.h       |  13 ++-
 arch/x86/kernel/kprobes/core.c         |   2 -
 arch/x86/kernel/reboot.c               |   4 +
 arch/x86/kernel/unwind_frame.c         |  38 +++++++-
 arch/x86/mm/Makefile                   |  11 ++-
 arch/x86/mm/tlb.c                      | 153 ++++++++++++++++++++++++---------
 16 files changed, 284 insertions(+), 88 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 8a13d468635a..50e0d2bc4528 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,7 +176,7 @@
 /*
  * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
  * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
+ * is just clearing the MSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
  * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
@@ -185,7 +185,7 @@
 .macro ENCODE_FRAME_POINTER
 #ifdef CONFIG_FRAME_POINTER
 	mov %esp, %ebp
-	orl $0x1, %ebp
+	andl $0x7fffffff, %ebp
 #endif
 .endm
 
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1a8eb550c40f..a5db63f728a2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
 u32 *hv_vp_index;
 EXPORT_SYMBOL_GPL(hv_vp_index);
 
+u32 hv_max_vp_index;
+
 static int hv_cpu_init(unsigned int cpu)
 {
 	u64 msr_vp_index;
@@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu)
 
 	hv_vp_index[smp_processor_id()] = msr_vp_index;
 
+	if (msr_vp_index > hv_max_vp_index)
+		hv_max_vp_index = msr_vp_index;
+
 	return 0;
 }
 
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 39e7f6e50919..9cc9e1c1e2db 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex {
 /* Each gva in gva_list encodes up to 4096 pages to flush */
 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
 
-static struct hv_flush_pcpu __percpu *pcpu_flush;
+static struct hv_flush_pcpu __percpu **pcpu_flush;
 
-static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
 
 /*
  * Fills in gva_list starting from offset. Returns the number of items added.
@@ -76,6 +76,18 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
 {
 	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
 
+	/* valid_bank_mask can represent up to 64 banks */
+	if (hv_max_vp_index / 64 >= 64)
+		return 0;
+
+	/*
+	 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+	 * structs are not cleared between calls, we risk flushing unneeded
+	 * vCPUs otherwise.
+	 */
+	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+		flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
+
 	/*
 	 * Some banks may end up being empty but this is acceptable.
 	 */
@@ -83,11 +95,6 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
 		vcpu = hv_cpu_number_to_vp_number(cpu);
 		vcpu_bank = vcpu / 64;
 		vcpu_offset = vcpu % 64;
-
-		/* valid_bank_mask can represent up to 64 banks */
-		if (vcpu_bank >= 64)
-			return 0;
-
 		__set_bit(vcpu_offset, (unsigned long *)
 			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
 		if (vcpu_bank >= nr_bank)
@@ -102,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 				    const struct flush_tlb_info *info)
 {
 	int cpu, vcpu, gva_n, max_gvas;
+	struct hv_flush_pcpu **flush_pcpu;
 	struct hv_flush_pcpu *flush;
 	u64 status = U64_MAX;
 	unsigned long flags;
@@ -116,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 
 	local_irq_save(flags);
 
-	flush = this_cpu_ptr(pcpu_flush);
+	flush_pcpu = this_cpu_ptr(pcpu_flush);
+
+	if (unlikely(!*flush_pcpu))
+		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+	flush = *flush_pcpu;
+
+	if (unlikely(!flush)) {
+		local_irq_restore(flags);
+		goto do_native;
+	}
 
 	if (info->mm) {
 		flush->address_space = virt_to_phys(info->mm->pgd);
@@ -173,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 				       const struct flush_tlb_info *info)
 {
 	int nr_bank = 0, max_gvas, gva_n;
+	struct hv_flush_pcpu_ex **flush_pcpu;
 	struct hv_flush_pcpu_ex *flush;
 	u64 status = U64_MAX;
 	unsigned long flags;
@@ -187,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 
 	local_irq_save(flags);
 
-	flush = this_cpu_ptr(pcpu_flush_ex);
+	flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
+
+	if (unlikely(!*flush_pcpu))
+		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+	flush = *flush_pcpu;
+
+	if (unlikely(!flush)) {
+		local_irq_restore(flags);
+		goto do_native;
+	}
 
 	if (info->mm) {
 		flush->address_space = virt_to_phys(info->mm->pgd);
@@ -222,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 		status = hv_do_rep_hypercall(
 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
-			0, nr_bank + 2, flush, NULL);
+			0, nr_bank, flush, NULL);
 	} else if (info->end &&
 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 		status = hv_do_rep_hypercall(
 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
-			0, nr_bank + 2, flush, NULL);
+			0, nr_bank, flush, NULL);
 	} else {
 		gva_n = fill_gva_list(flush->gva_list, nr_bank,
 				      info->start, info->end);
 		status = hv_do_rep_hypercall(
 			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
-			gva_n, nr_bank + 2, flush, NULL);
+			gva_n, nr_bank, flush, NULL);
 	}
 
 	local_irq_restore(flags);
@@ -266,7 +295,7 @@ void hyper_alloc_mmu(void)
 		return;
 
 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
-		pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+		pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
 	else
-		pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+		pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
 }
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e7636bac7372..6c98821fef5e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -62,8 +62,10 @@
 #define new_len2		145f-144f
 
 /*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
  * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
  */
 #define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
 
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index c096624137ae..ccbe24e697c4 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	alt_end_marker ":\n"
 
 /*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
  * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
  *
- * The additional "-" is needed because gas works with s32s.
+ * The additional "-" is needed because gas uses a "true" value of -1.
  */
-#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
 
 /*
  * Pad the second replacement alternative with additional NOPs if it is
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c120b5db178a..3c856a15b98e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 	DEBUG_LOCKS_WARN_ON(preemptible());
 }
 
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-	int cpu = smp_processor_id();
-
-	if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
-		cpumask_clear_cpu(cpu, mm_cpumask(mm));
-}
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 738503e1f80c..530f448fddaf 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -289,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
  * to this information.
  */
 extern u32 *hv_vp_index;
+extern u32 hv_max_vp_index;
 
 /**
  * hv_cpu_number_to_vp_number() - Map CPU to VP.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4893abf7f74f..d362161d3291 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -83,6 +83,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 #endif
 
 /*
+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
+ * to init_mm when we switch to a kernel thread (e.g. the idle thread).  If
+ * it's false, then we immediately switch CR3 when entering a kernel thread.
+ */
+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
+/*
  * 6 because 6 should be plenty and struct tlb_state will fit in
  * two cache lines.
  */
@@ -105,6 +112,23 @@ struct tlb_state {
 	u16 next_asid;
 
 	/*
+	 * We can be in one of several states:
+	 *
+	 *  - Actively using an mm.  Our CPU's bit will be set in
+	 *    mm_cpumask(loaded_mm) and is_lazy == false;
+	 *
+	 *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
+	 *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+	 *
+	 *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
+	 *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
+	 *    We're heuristically guessing that the CR3 load we
+	 *    skipped more than makes up for the overhead added by
+	 *    lazy mode.
+	 */
+	bool is_lazy;
+
+	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
 	 * disabling interrupts when modifying either one.
 	 */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c769f77d..ff891772c9f8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void)
 	return ~0U;
 }
 
+static u32 skx_deadline_rev(void)
+{
+	switch (boot_cpu_data.x86_mask) {
+	case 0x03: return 0x01000136;
+	case 0x04: return 0x02000014;
+	}
+
+	return ~0U;
+}
+
 static const struct x86_cpu_id deadline_match[] = {
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020),
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X,	0x02000014),
+	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev),
 
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20),
@@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void)
 	const struct x86_cpu_id *m;
 	u32 rev;
 
-	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+	    boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return;
 
 	m = x86_match_cpu(deadline_match);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 86e8f0b2537b..c4fa4a85d4cb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
 	bool *res = &dis_ucode_ldr;
 #endif
 
-	if (!have_cpuid_p())
-		return *res;
-
 	/*
 	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
 	 * completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
 void __init load_ucode_bsp(void)
 {
 	unsigned int cpuid_1_eax;
+	bool intel = true;
 
-	if (check_loader_disabled_bsp())
+	if (!have_cpuid_p())
 		return;
 
 	cpuid_1_eax = native_cpuid_eax(1);
 
 	switch (x86_cpuid_vendor()) {
 	case X86_VENDOR_INTEL:
-		if (x86_family(cpuid_1_eax) >= 6)
-			load_ucode_intel_bsp();
+		if (x86_family(cpuid_1_eax) < 6)
+			return;
 		break;
+
 	case X86_VENDOR_AMD:
-		if (x86_family(cpuid_1_eax) >= 0x10)
-			load_ucode_amd_bsp(cpuid_1_eax);
+		if (x86_family(cpuid_1_eax) < 0x10)
+			return;
+		intel = false;
 		break;
+
 	default:
-		break;
+		return;
 	}
+
+	if (check_loader_disabled_bsp())
+		return;
+
+	if (intel)
+		load_ucode_intel_bsp();
+	else
+		load_ucode_amd_bsp(cpuid_1_eax);
 }
 
 static bool check_loader_disabled_ap(void)
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index db2182d63ed0..3fc0f9a794cb 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -3,6 +3,15 @@
 
 /* Kprobes and Optprobes common header */
 
+#include <asm/asm.h>
+
+#ifdef CONFIG_FRAME_POINTER
+# define SAVE_RBP_STRING "	push %" _ASM_BP "\n" \
+			 "	mov  %" _ASM_SP ", %" _ASM_BP "\n"
+#else
+# define SAVE_RBP_STRING "	push %" _ASM_BP "\n"
+#endif
+
 #ifdef CONFIG_X86_64
 #define SAVE_REGS_STRING			\
 	/* Skip cs, ip, orig_ax. */		\
@@ -17,7 +26,7 @@
 	"	pushq %r10\n"			\
 	"	pushq %r11\n"			\
 	"	pushq %rbx\n"			\
-	"	pushq %rbp\n"			\
+	SAVE_RBP_STRING				\
 	"	pushq %r12\n"			\
 	"	pushq %r13\n"			\
 	"	pushq %r14\n"			\
@@ -48,7 +57,7 @@
 	"	pushl %es\n"			\
 	"	pushl %ds\n"			\
 	"	pushl %eax\n"			\
-	"	pushl %ebp\n"			\
+	SAVE_RBP_STRING				\
 	"	pushl %edi\n"			\
 	"	pushl %esi\n"			\
 	"	pushl %edx\n"			\
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f0153714ddac..0742491cbb73 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * raw stack chunk with redzones:
 	 */
 	__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
-	regs->flags &= ~X86_EFLAGS_IF;
-	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
 
 	/*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54180fa6f66f..add33f600531 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type)
 	load_cr3(initial_page_table);
 #else
 	write_cr3(real_mode_header->trampoline_pgd);
+
+	/* Exiting long mode will fail if CR4.PCIDE is set. */
+	if (static_cpu_has(X86_FEATURE_PCID))
+		cr4_clear_bits(X86_CR4_PCIDE);
 #endif
 
 	/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index d145a0b1f529..3dc26f95d46e 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state)
 			state->stack_info.type, state->stack_info.next_sp,
 			state->stack_mask, state->graph_idx);
 
-	for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+	for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
+	     sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
 		if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
 			break;
 
@@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state)
  * This determines if the frame pointer actually contains an encoded pointer to
  * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
  */
+#ifdef CONFIG_X86_64
 static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 {
 	unsigned long regs = (unsigned long)bp;
@@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 
 	return (struct pt_regs *)(regs & ~0x1);
 }
+#else
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+	unsigned long regs = (unsigned long)bp;
+
+	if (regs & 0x80000000)
+		return NULL;
+
+	return (struct pt_regs *)(regs | 0x80000000);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
+#else
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
+#endif
 
 static bool update_stack_state(struct unwind_state *state,
 			       unsigned long *next_bp)
@@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state,
 	regs = decode_frame_pointer(next_bp);
 	if (regs) {
 		frame = (unsigned long *)regs;
-		len = regs_size(regs);
+		len = KERNEL_REGS_SIZE;
 		state->got_irq = true;
 	} else {
 		frame = next_bp;
@@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state,
 	    frame < prev_frame_end)
 		return false;
 
+	/*
+	 * On 32-bit with user mode regs, make sure the last two regs are safe
+	 * to access:
+	 */
+	if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
+	    !on_stack(info, frame, len + 2*sizeof(long)))
+		return false;
+
 	/* Move state to the next frame: */
 	if (regs) {
 		state->regs = regs;
@@ -328,6 +355,13 @@ bool unwind_next_frame(struct unwind_state *state)
 	    state->regs->sp < (unsigned long)task_pt_regs(state->task))
 		goto the_end;
 
+	/*
+	 * There are some known frame pointer issues on 32-bit.  Disable
+	 * unwinder warnings on 32-bit until it gets objtool support.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32))
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 72bf8c01c6e3..e1f095884386 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,12 @@
-# Kernel does not boot with instrumentation of tlb.c.
-KCOV_INSTRUMENT_tlb.o	:= n
+# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
+KCOV_INSTRUMENT_tlb.o		:= n
+KCOV_INSTRUMENT_mem_encrypt.o	:= n
+
+KASAN_SANITIZE_mem_encrypt.o	:= n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_mem_encrypt.o	= -pg
+endif
 
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 49d9778376d7..658bf0090565 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -30,6 +30,8 @@
 
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
 
+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
 static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
 			    u16 *new_asid, bool *need_flush)
 {
@@ -80,7 +82,7 @@ void leave_mm(int cpu)
 		return;
 
 	/* Warn if we're not lazy. */
-	WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
+	WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
 
 	switch_mm(NULL, &init_mm, NULL);
 }
@@ -142,45 +144,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		__flush_tlb_all();
 	}
 #endif
+	this_cpu_write(cpu_tlbstate.is_lazy, false);
 
 	if (real_prev == next) {
 		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 			  next->context.ctx_id);
 
-		if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
-			/*
-			 * There's nothing to do: we weren't lazy, and we
-			 * aren't changing our mm.  We don't need to flush
-			 * anything, nor do we need to update CR3, CR4, or
-			 * LDTR.
-			 */
-			return;
-		}
-
-		/* Resume remote flushes and then read tlb_gen. */
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-
-		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
-		    next_tlb_gen) {
-			/*
-			 * Ideally, we'd have a flush_tlb() variant that
-			 * takes the known CR3 value as input.  This would
-			 * be faster on Xen PV and on hypothetical CPUs
-			 * on which INVPCID is fast.
-			 */
-			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
-				       next_tlb_gen);
-			write_cr3(build_cr3(next, prev_asid));
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
-					TLB_FLUSH_ALL);
-		}
-
 		/*
-		 * We just exited lazy mode, which means that CR4 and/or LDTR
-		 * may be stale.  (Changes to the required CR4 and LDTR states
-		 * are not reflected in tlb_gen.)
+		 * We don't currently support having a real mm loaded without
+		 * our cpu set in mm_cpumask().  We have all the bookkeeping
+		 * in place to figure out whether we would need to flush
+		 * if our cpu were cleared in mm_cpumask(), but we don't
+		 * currently use it.
 		 */
+		if (WARN_ON_ONCE(real_prev != &init_mm &&
+				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
+			cpumask_set_cpu(cpu, mm_cpumask(next));
+
+		return;
 	} else {
 		u16 new_asid;
 		bool need_flush;
@@ -199,10 +180,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		}
 
 		/* Stop remote flushes for the previous mm */
-		if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
-			cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
-
-		VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+				real_prev != &init_mm);
+		cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
 
 		/*
 		 * Start remote flushes and then read tlb_gen.
@@ -233,6 +213,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 }
 
 /*
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
+ * kernel thread or other context without an mm.  Acceptable implementations
+ * include doing nothing whatsoever, switching to init_mm, or various clever
+ * lazy tricks to try to minimize TLB flushes.
+ *
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
+ * in a row.  It will notify us that we're going back to a real mm by
+ * calling switch_mm_irqs_off().
+ */
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+		return;
+
+	if (static_branch_unlikely(&tlb_use_lazy_mode)) {
+		/*
+		 * There's a significant optimization that may be possible
+		 * here.  We have accurate enough TLB flush tracking that we
+		 * don't need to maintain coherence of TLB per se when we're
+		 * lazy.  We do, however, need to maintain coherence of
+		 * paging-structure caches.  We could, in principle, leave our
+		 * old mm loaded and only switch to init_mm when
+		 * tlb_remove_page() happens.
+		 */
+		this_cpu_write(cpu_tlbstate.is_lazy, true);
+	} else {
+		switch_mm(NULL, &init_mm, NULL);
+	}
+}
+
+/*
  * Call this when reinitializing a CPU.  It fixes the following potential
  * problems:
  *
@@ -303,16 +314,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 	/* This code cannot presently handle being reentered. */
 	VM_WARN_ON(!irqs_disabled());
 
+	if (unlikely(loaded_mm == &init_mm))
+		return;
+
 	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
 		   loaded_mm->context.ctx_id);
 
-	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
 		/*
-		 * We're in lazy mode -- don't flush.  We can get here on
-		 * remote flushes due to races and on local flushes if a
-		 * kernel thread coincidentally flushes the mm it's lazily
-		 * still using.
+		 * We're in lazy mode.  We need to at least flush our
+		 * paging-structure cache to avoid speculatively reading
+		 * garbage into our TLB.  Since switching to init_mm is barely
+		 * slower than a minimal flush, just switch to init_mm.
 		 */
+		switch_mm_irqs_off(NULL, &init_mm, NULL);
 		return;
 	}
 
@@ -611,3 +626,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
 	return 0;
 }
 late_initcall(create_tlb_single_page_flush_ceiling);
+
+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	char buf[2];
+
+	buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
+	buf[1] = '\n';
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t tlblazy_write_file(struct file *file,
+		 const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	bool val;
+
+	if (kstrtobool_from_user(user_buf, count, &val))
+		return -EINVAL;
+
+	if (val)
+		static_branch_enable(&tlb_use_lazy_mode);
+	else
+		static_branch_disable(&tlb_use_lazy_mode);
+
+	return count;
+}
+
+static const struct file_operations fops_tlblazy = {
+	.read = tlblazy_read_file,
+	.write = tlblazy_write_file,
+	.llseek = default_llseek,
+};
+
+static int __init init_tlb_use_lazy_mode(void)
+{
+	if (boot_cpu_has(X86_FEATURE_PCID)) {
+		/*
+		 * Heuristic: with PCID on, switching to and from
+		 * init_mm is reasonably fast, but remote flush IPIs
+		 * as expensive as ever, so turn off lazy TLB mode.
+		 *
+		 * We can't do this in setup_pcid() because static keys
+		 * haven't been initialized yet, and it would blow up
+		 * badly.
+		 */
+		static_branch_disable(&tlb_use_lazy_mode);
+	}
+
+	debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
+			    arch_debugfs_dir, NULL, &fops_tlblazy);
+	return 0;
+}
+late_initcall(init_tlb_use_lazy_mode);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-09-24 11:28 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-09-24 11:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Andy Lutomirski

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f5caf621ee357279e759c0911daf6d55c7d36f03 x86/asm: Fix inline asm call constraints for Clang

Another round of CR3/PCID related fixes (I think this addresses all but one of the 
known problems with PCID support), an objtool fix plus a Clang fix that (finally) 
solves all Clang quirks to build a bootable x86 kernel as-is.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  tools/objtool/check.c              # 0d0970eef3b0: objtool: Handle another GCC 

 Thanks,

	Ingo

------------------>
Andy Lutomirski (4):
      x86/mm: Factor out CR3-building code
      x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code
      x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
      x86/mm/32: Load a sane CR3 before cpu_init() on secondary CPUs

Josh Poimboeuf (2):
      objtool: Handle another GCC stack pointer adjustment bug
      x86/asm: Fix inline asm call constraints for Clang


 arch/x86/include/asm/alternative.h               |  3 +-
 arch/x86/include/asm/asm.h                       | 11 ++++++
 arch/x86/include/asm/mmu_context.h               | 32 +++++++++++++++---
 arch/x86/include/asm/mshyperv.h                  | 10 +++---
 arch/x86/include/asm/paravirt_types.h            | 14 ++++----
 arch/x86/include/asm/preempt.h                   | 15 +++------
 arch/x86/include/asm/processor.h                 |  6 ++--
 arch/x86/include/asm/rwsem.h                     |  4 +--
 arch/x86/include/asm/uaccess.h                   |  4 +--
 arch/x86/include/asm/xen/hypercall.h             |  5 ++-
 arch/x86/kernel/cpu/bugs.c                       |  8 -----
 arch/x86/kernel/cpu/common.c                     |  8 +++++
 arch/x86/kernel/smpboot.c                        | 13 +++----
 arch/x86/kvm/emulate.c                           |  3 +-
 arch/x86/kvm/vmx.c                               |  3 +-
 arch/x86/mm/fault.c                              |  3 +-
 arch/x86/mm/tlb.c                                | 11 +++---
 tools/objtool/Documentation/stack-validation.txt |  6 ++--
 tools/objtool/arch/x86/decode.c                  |  6 ++--
 tools/objtool/check.c                            | 43 ++++++++++++++++--------
 20 files changed, 122 insertions(+), 86 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..c096624137ae 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
 			   output, input...)				      \
 {									      \
-	register void *__sp asm(_ASM_SP);				      \
 	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
 		"call %P[new2]", feature2)				      \
-		: output, "+r" (__sp)					      \
+		: output, ASM_CALL_CONSTRAINT				      \
 		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
 		  [new2] "i" (newfunc2), ## input);			      \
 }
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..c1eadbaf1115 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,4 +132,15 @@
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..c120b5db178a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits.  This serves two purposes.  It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero.  It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		VM_WARN_ON_ONCE(asid > 4094);
+		return __sme_pa(mm->pgd) | (asid + 1);
+	} else {
+		VM_WARN_ON_ONCE(asid != 0);
+		return __sme_pa(mm->pgd);
+	}
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+	VM_WARN_ON_ONCE(asid > 4094);
+	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+}
 
 /*
  * This can be used from process context to figure out what the value of
@@ -296,10 +322,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
-	if (static_cpu_has(X86_FEATURE_PCID))
-		cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..738503e1f80c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 input_address = input ? virt_to_phys(input) : 0;
 	u64 output_address = output ? virt_to_phys(output) : 0;
 	u64 hv_status;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("mov %4, %%r8\n"
 			     "call *%5"
-			     : "=a" (hv_status), "+r" (__sp),
+			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			       "+c" (control), "+d" (input_address)
 			     :  "r" (output_address), "m" (hv_hypercall_pg)
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("call *%7"
 			     : "=A" (hv_status),
-			       "+c" (input_address_lo), "+r" (__sp)
+			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			     : "A" (control),
 			       "b" (input_address_hi),
 			       "D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	{
 		__asm__ __volatile__("call *%4"
-				     : "=a" (hv_status), "+r" (__sp),
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
 				     : "m" (hv_hypercall_pg)
 				     : "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 		__asm__ __volatile__ ("call *%5"
 				      : "=A"(hv_status),
 					"+c"(input1_lo),
-					"+r"(__sp)
+					ASM_CALL_CONSTRAINT
 				      :	"A" (control),
 					"b" (input1_hi),
 					"m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 42873edd9f9d..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
  */
 #ifdef CONFIG_X86_32
 #define PVOP_VCALL_ARGS							\
-	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;	\
-	register void *__sp asm("esp")
+	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
 #define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
 /* [re]ax isn't an arg, but the return val */
 #define PVOP_VCALL_ARGS						\
 	unsigned long __edi = __edi, __esi = __esi,		\
-		__edx = __edx, __ecx = __ecx, __eax = __eax;	\
-	register void *__sp asm("rsp")
+		__edx = __edx, __ecx = __ecx, __eax = __eax;
+
 #define PVOP_CALL_ARGS		PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-			     : call_clbr, "+r" (__sp)			\
+			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
 			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
 
 #ifdef CONFIG_PREEMPT
   extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule()					\
-({								\
-	register void *__sp asm(_ASM_SP);			\
-	asm volatile ("call ___preempt_schedule" : "+r"(__sp));	\
-})
+# define __preempt_schedule() \
+	asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
 
   extern asmlinkage void preempt_schedule(void);
   extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace()					\
-({									\
-	register void *__sp asm(_ASM_SP);				\
-	asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp));	\
-})
+# define __preempt_schedule_notrace() \
+	asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
   extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
 	 * Like all of Linux's memory ordering operations, this is a
 	 * compiler barrier as well.
 	 */
-	register void *__sp asm(_ASM_SP);
-
 #ifdef CONFIG_X86_32
 	asm volatile (
 		"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
 		"pushl $1f\n\t"
 		"iret\n\t"
 		"1:"
-		: "+r" (__sp) : : "memory");
+		: ASM_CALL_CONSTRAINT : : "memory");
 #else
 	unsigned int tmp;
 
@@ -703,7 +701,7 @@ static inline void sync_core(void)
 		"iretq\n\t"
 		UNWIND_HINT_RESTORE
 		"1:"
-		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 ({							\
 	long tmp;					\
 	struct rw_semaphore* ret;			\
-	register void *__sp asm(_ASM_SP);		\
 							\
 	asm volatile("# beginning down_write\n\t"	\
 		     LOCK_PREFIX "  xadd      %1,(%4)\n\t"	\
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 		     "  call " slow_path "\n"		\
 		     "1:\n"				\
 		     "# ending down_write"		\
-		     : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+		     : "+m" (sem->count), "=d" (tmp),	\
+		       "=a" (ret), ASM_CALL_CONSTRAINT	\
 		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
 		     : "memory", "cc");			\
 	ret;						\
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..78e8fcc87d4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 ({									\
 	int __ret_gu;							\
 	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
-	register void *__sp asm(_ASM_SP);				\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
 	asm volatile("call __get_user_%P4"				\
-		     : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp)	\
+		     : "=a" (__ret_gu), "=r" (__val_gu),		\
+			ASM_CALL_CONSTRAINT				\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__builtin_expect(__ret_gu, 0);					\
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..128a1a0b1450 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
 	register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
 	register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
 	register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
-	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
-	register void *__sp asm(_ASM_SP);
+	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
 
-#define __HYPERCALL_0PARAM	"=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM	"=r" (__res), ASM_CALL_CONSTRAINT
 #define __HYPERCALL_1PARAM	__HYPERCALL_0PARAM, "+r" (__arg1)
 #define __HYPERCALL_2PARAM	__HYPERCALL_1PARAM, "+r" (__arg2)
 #define __HYPERCALL_3PARAM	__HYPERCALL_2PARAM, "+r" (__arg3)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
 
 void __init check_bugs(void)
 {
-#ifdef CONFIG_X86_32
-	/*
-	 * Regardless of whether PCID is enumerated, the SDM says
-	 * that it can't be enabled in 32-bit mode.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
 	identify_boot_cpu();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 	fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Regardless of whether PCID is enumerated, the SDM says
+	 * that it can't be enabled in 32-bit mode.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
 }
 
 void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
 		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
-	cpu_init();
-	x86_cpuinit.early_percpu_clock_init();
-	preempt_disable();
-	smp_callin();
-
-	enable_start_cpu0 = 0;
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
 	__flush_tlb_all();
 #endif
 
+	cpu_init();
+	x86_cpuinit.early_percpu_clock_init();
+	preempt_disable();
+	smp_callin();
+
+	enable_start_cpu0 = 0;
+
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
 	/*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..f23f13403f33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
-	register void *__sp asm(_ASM_SP);
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
 
 	if (!(ctxt->d & ByteOp))
@@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 
 	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-	      [fastop]"+S"(fop), "+r"(__sp)
+	      [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
 	    : "c"(ctxt->src2.val));
 
 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..6ee237f509dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 {
 	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	register void *__sp asm(_ASM_SP);
 
 	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
 			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
 			[sp]"=&r"(tmp),
 #endif
-			"+r"(__sp)
+			ASM_CALL_CONSTRAINT
 			:
 			[entry]"r"(entry),
 			[ss]"i"(__KERNEL_DS),
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..39567b5c33da 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_vmalloc_addr((void *)address) &&
 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-		register void *__sp asm("rsp");
 		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		asm volatile ("movq %[stack], %%rsp\n\t"
 			      "call handle_stack_overflow\n\t"
 			      "1: jmp 1b"
-			      : "+r" (__sp)
+			      : ASM_CALL_CONSTRAINT
 			      : "D" ("kernel stack overflow (page fault)"),
 				"S" (regs), "d" (address),
 				[stack] "rm" (stack));
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1ab3821f9e26..93fe97cce581 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -126,8 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() !=
-			 (__sme_pa(real_prev->pgd) | prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -172,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			 */
 			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
 				       next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | prev_asid);
+			write_cr3(build_cr3(next, prev_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		}
@@ -216,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | new_asid);
+			write_cr3(build_cr3(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
-			write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+			write_cr3(build_cr3_noflush(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 		}
 
@@ -265,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(cr3 & ~CR3_PCID_MASK);
+	write_cr3(build_cr3(mm, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..3995735a878f 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
    If it's a GCC-compiled .c file, the error may be because the function
    uses an inline asm() statement which has a "call" instruction.  An
    asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
    Otherwise the stack frame may not get created before the call.
 
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0e8c8ec4fd4e..0f22768c0d4d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0x89:
-		if (rex == 0x48 && modrm == 0xe5) {
+		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
-			/* mov %rsp, %rbp */
+			/* mov %rsp, reg */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_REG;
 			op->src.reg = CFI_SP;
 			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_BP;
+			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
 			break;
 		}
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f744617c9946..a0c518ecf085 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		switch (op->src.type) {
 
 		case OP_SRC_REG:
-			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+			    cfa->base == CFI_SP &&
+			    regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
+
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			}
 
-				if (cfa->base == CFI_SP &&
-				    regs[CFI_BP].base == CFI_CFA &&
-				    regs[CFI_BP].offset == -cfa->offset) {
+			else if (op->src.reg == CFI_SP &&
+				 op->dest.reg == CFI_BP && state->drap) {
 
-					/* mov %rsp, %rbp */
-					cfa->base = op->dest.reg;
-					state->bp_scratch = false;
-				}
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			}
 
-				else if (state->drap) {
+			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
 
-					/* drap: mov %rsp, %rbp */
-					regs[CFI_BP].base = CFI_BP;
-					regs[CFI_BP].offset = -state->stack_size;
-					state->bp_scratch = false;
-				}
+				/*
+				 * mov %rsp, %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does:
+				 *
+				 *   mov    %rsp, %rax
+				 *   ...
+				 *   mov    %rax, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = -state->stack_size;
 			}
 
 			else if (op->dest.reg == cfa->base) {

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-09-13 17:54 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-09-13 17:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andy Lutomirski, Andrew Morton, Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1278f58cdee63cfbb04e5624474a291c81a7a13b x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition

The main changes are the PCID fixes from Andy, but there's also two hyperv fixes 
and two paravirt updates.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  MAINTAINERS                        # 30c1bbffe629: paravirt: Switch maintainer

 Thanks,

	Ingo

------------------>
Andy Lutomirski (3):
      x86/mm: Get rid of VM_BUG_ON in switch_tlb_irqs_off()
      x86/hibernate/64: Mask off CR3's PCID bits in the saved CR3
      x86/mm/64: Initialize CR4.PCIDE early

Juergen Gross (2):
      x86/paravirt: Remove no longer used paravirt functions
      paravirt: Switch maintainer

K. Y. Srinivasan (1):
      x86/hyper-V: Allocate the IDT entry early in boot

Vitaly Kuznetsov (1):
      x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition


 MAINTAINERS                           |  4 +--
 arch/x86/include/asm/desc.h           |  3 +--
 arch/x86/include/asm/paravirt.h       | 37 --------------------------
 arch/x86/include/asm/paravirt_types.h |  9 -------
 arch/x86/include/asm/pgtable.h        | 27 +++----------------
 arch/x86/include/asm/special_insns.h  | 10 +++----
 arch/x86/include/uapi/asm/hyperv.h    |  6 -----
 arch/x86/kernel/cpu/common.c          | 49 +++++------------------------------
 arch/x86/kernel/cpu/mshyperv.c        |  4 +--
 arch/x86/kernel/paravirt.c            |  5 ----
 arch/x86/kernel/setup.c               |  5 +++-
 arch/x86/kernel/smpboot.c             |  8 +++---
 arch/x86/kvm/vmx.c                    |  2 +-
 arch/x86/mm/init.c                    | 34 ++++++++++++++++++++++++
 arch/x86/mm/pgtable.c                 |  7 +----
 arch/x86/mm/tlb.c                     | 22 +++++++++++++++-
 arch/x86/power/hibernate_64.c         | 21 ++++++++++++++-
 arch/x86/xen/enlighten_pv.c           |  2 --
 arch/x86/xen/mmu_pv.c                 |  2 --
 19 files changed, 107 insertions(+), 150 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f46a3225e398..14e76a41b302 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10129,7 +10129,7 @@ F:	include/uapi/linux/ppdev.h
 F:	Documentation/parport*.txt
 
 PARAVIRT_OPS INTERFACE
-M:	Jeremy Fitzhardinge <jeremy@goop.org>
+M:	Juergen Gross <jgross@suse.com>
 M:	Chris Wright <chrisw@sous-sol.org>
 M:	Alok Kataria <akataria@vmware.com>
 M:	Rusty Russell <rusty@rustcorp.com.au>
@@ -10137,7 +10137,7 @@ L:	virtualization@lists.linux-foundation.org
 S:	Supported
 F:	Documentation/virtual/paravirt_ops.txt
 F:	arch/*/kernel/paravirt*
-F:	arch/*/include/asm/paravirt.h
+F:	arch/*/include/asm/paravirt*.h
 F:	include/linux/hypervisor.h
 
 PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1a2ba368da39..9d0e13738ed3 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr)
 #define load_ldt(ldt)				asm volatile("lldt %0"::"m" (ldt))
 
 #define store_gdt(dtr)				native_store_gdt(dtr)
-#define store_idt(dtr)				native_store_idt(dtr)
 #define store_tr(tr)				(tr = native_store_tr())
 
 #define load_TLS(t, cpu)			native_load_tls(t, cpu)
@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
 	asm volatile("sgdt %0":"=m" (*dtr));
 }
 
-static inline void native_store_idt(struct desc_ptr *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
 {
 	asm volatile("sidt %0":"=m" (*dtr));
 }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25dd22f7c70..12deec722cf0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
 	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
 }
 
-static inline unsigned long __read_cr4(void)
-{
-	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
-}
-
 static inline void __write_cr4(unsigned long x)
 {
 	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
 {
 	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 }
-static inline void store_idt(struct desc_ptr *dtr)
-{
-	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
-}
 static inline unsigned long paravirt_store_tr(void)
 {
 	return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 	PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
 }
 
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep)
-{
-	PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
-}
-
 static inline pte_t __pte(pteval_t val)
 {
 	pteval_t ret;
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
 }
 
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-			      pmd_t *pmdp, pmd_t pmd)
-{
-	if (sizeof(pmdval_t) > sizeof(long))
-		/* 5 arg words */
-		pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
-	else
-		PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
-			    native_pmd_val(pmd));
-}
-
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
-			      pud_t *pudp, pud_t pud)
-{
-	if (sizeof(pudval_t) > sizeof(long))
-		/* 5 arg words */
-		pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
-	else
-		PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
-			    native_pud_val(pud));
-}
-
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	pmdval_t val = native_pmd_val(pmd);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 6b64fc6367f2..42873edd9f9d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
 	unsigned long (*read_cr0)(void);
 	void (*write_cr0)(unsigned long);
 
-	unsigned long (*read_cr4)(void);
 	void (*write_cr4)(unsigned long);
 
 #ifdef CONFIG_X86_64
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
 	void (*load_tr_desc)(void);
 	void (*load_gdt)(const struct desc_ptr *);
 	void (*load_idt)(const struct desc_ptr *);
-	/* store_gdt has been removed. */
-	void (*store_idt)(struct desc_ptr *);
 	void (*set_ldt)(const void *desc, unsigned entries);
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
 	void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, pte_t pteval);
 	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
-	void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
-			   pmd_t *pmdp, pmd_t pmdval);
-	void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
-			   pud_t *pudp, pud_t pudval);
-	void (*pte_update)(struct mm_struct *mm, unsigned long addr,
-			   pte_t *ptep);
 
 	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
 					pte_t *ptep);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5b4c44d419c5..b714934512b3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags;
 #else  /* !CONFIG_PARAVIRT */
 #define set_pte(ptep, pte)		native_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
-#define set_pmd_at(mm, addr, pmdp, pmd)	native_set_pmd_at(mm, addr, pmdp, pmd)
-#define set_pud_at(mm, addr, pudp, pud)	native_set_pud_at(mm, addr, pudp, pud)
 
 #define set_pte_atomic(ptep, pte)					\
 	native_set_pte_atomic(ptep, pte)
@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags;
 #define pte_clear(mm, addr, ptep)	native_pte_clear(mm, addr, ptep)
 #define pmd_clear(pmd)			native_pmd_clear(pmd)
 
-#define pte_update(mm, addr, ptep)              do { } while (0)
-
 #define pgd_val(x)	native_pgd_val(x)
 #define __pgd(x)	native_make_pgd(x)
 
@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	native_set_pte(ptep, pte);
 }
 
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
-				     pmd_t *pmdp , pmd_t pmd)
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
 {
 	native_set_pmd(pmdp, pmd);
 }
 
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
-				     pud_t *pudp, pud_t pud)
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+			      pud_t *pudp, pud_t pud)
 {
 	native_set_pud(pudp, pud);
 }
 
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces.  It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush.
- */
-#define pte_update(mm, addr, ptep)		do { } while (0)
-#endif
-
 /*
  * We only update the dirty/accessed state if we set
  * the dirty bit by hand in the kernel, since the hardware
@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep)
 {
 	pte_t pte = native_ptep_get_and_clear(ptep);
-	pte_update(mm, addr, ptep);
 	return pte;
 }
 
@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
 	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
-	pte_update(mm, addr, ptep);
 }
 
 #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 9efaabf5b54b..a24dfcf79f4a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
 
 extern asmlinkage void native_load_gs_index(unsigned);
 
+static inline unsigned long __read_cr4(void)
+{
+	return native_read_cr4();
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
 	native_write_cr3(x);
 }
 
-static inline unsigned long __read_cr4(void)
-{
-	return native_read_cr4();
-}
-
 static inline void __write_cr4(unsigned long x)
 {
 	native_write_cr4(x);
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7032f4d8dff3..f65d12504e80 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,12 +153,6 @@
 #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
 
 /*
- * HV_VP_SET available
- */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
-
-
-/*
  * Crash notification flag.
  */
 #define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fb1d3358a4af..775f10100d7f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
 __setup("nompx", x86_mpx_setup);
 
 #ifdef CONFIG_X86_64
-static int __init x86_pcid_setup(char *s)
+static int __init x86_nopcid_setup(char *s)
 {
-	/* require an exact match without trailing characters */
-	if (strlen(s))
-		return 0;
+	/* nopcid doesn't accept parameters */
+	if (s)
+		return -EINVAL;
 
 	/* do not emit a message if the feature is not present */
 	if (!boot_cpu_has(X86_FEATURE_PCID))
-		return 1;
+		return 0;
 
 	setup_clear_cpu_cap(X86_FEATURE_PCID);
 	pr_info("nopcid: PCID feature disabled\n");
-	return 1;
+	return 0;
 }
-__setup("nopcid", x86_pcid_setup);
+early_param("nopcid", x86_nopcid_setup);
 #endif
 
 static int __init x86_noinvpcid_setup(char *s)
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	}
 }
 
-static void setup_pcid(struct cpuinfo_x86 *c)
-{
-	if (cpu_has(c, X86_FEATURE_PCID)) {
-		if (cpu_has(c, X86_FEATURE_PGE)) {
-			/*
-			 * We'd like to use cr4_set_bits_and_update_boot(),
-			 * but we can't.  CR4.PCIDE is special and can only
-			 * be set in long mode, and the early CPU init code
-			 * doesn't know this and would try to restore CR4.PCIDE
-			 * prior to entering long mode.
-			 *
-			 * Instead, we rely on the fact that hotplug, resume,
-			 * etc all fully restore CR4 before they write anything
-			 * that could have nonzero PCID bits to CR3.  CR4.PCIDE
-			 * has no effect on the page tables themselves, so we
-			 * don't need it to be restored early.
-			 */
-			cr4_set_bits(X86_CR4_PCIDE);
-		} else {
-			/*
-			 * flush_tlb_all(), as currently implemented, won't
-			 * work if PCID is on but PGE is not.  Since that
-			 * combination doesn't exist on real hardware, there's
-			 * no reason to try to fully support it, but it's
-			 * polite to avoid corrupting data if we're on
-			 * an improperly configured VM.
-			 */
-			clear_cpu_cap(c, X86_FEATURE_PCID);
-		}
-	}
-}
-
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	setup_smep(c);
 	setup_smap(c);
 
-	/* Set up PCID */
-	setup_pcid(c);
-
 	/*
 	 * The vendor-specific functions might have changed features.
 	 * Now we do "generic changes."
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3b3f713e15e5..236324e83a3a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs)
 void hv_setup_vmbus_irq(void (*handler)(void))
 {
 	vmbus_handler = handler;
-	/* Setup the IDT for hypervisor callback */
-	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 }
 
 void hv_remove_vmbus_irq(void)
@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void)
 	 */
 	x86_platform.apic_post_init = hyperv_init;
 	hyperv_setup_mmu_ops();
+	/* Setup the IDT for hypervisor callback */
+	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 #endif
 }
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a14df9eecfed..19a3e8f961c7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.set_debugreg = native_set_debugreg,
 	.read_cr0 = native_read_cr0,
 	.write_cr0 = native_write_cr0,
-	.read_cr4 = native_read_cr4,
 	.write_cr4 = native_write_cr4,
 #ifdef CONFIG_X86_64
 	.read_cr8 = native_read_cr8,
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.set_ldt = native_set_ldt,
 	.load_gdt = native_load_gdt,
 	.load_idt = native_load_idt,
-	.store_idt = native_store_idt,
 	.store_tr = native_store_tr,
 	.load_tls = native_load_tls,
 #ifdef CONFIG_X86_64
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 	.set_pte = native_set_pte,
 	.set_pte_at = native_set_pte_at,
 	.set_pmd = native_set_pmd,
-	.set_pmd_at = native_set_pmd_at,
-	.pte_update = paravirt_nop,
 
 	.ptep_modify_prot_start = __ptep_modify_prot_start,
 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 	.pmd_clear = native_pmd_clear,
 #endif
 	.set_pud = native_set_pud,
-	.set_pud_at = native_set_pud_at,
 
 	.pmd_val = PTE_IDENT,
 	.make_pmd = PTE_IDENT,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d84afb0a322d..0957dd73d127 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p)
 	 * with the current CR4 value.  This may not be necessary, but
 	 * auditing all the early-boot CR4 manipulation would be needed to
 	 * rule it out.
+	 *
+	 * Mask off features that don't work outside long mode (just
+	 * PCIDE for now).
 	 */
-	mmu_cr4_features = __read_cr4();
+	mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
 
 	memblock_set_current_limit(get_max_mapped());
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index cd6622c3204e..0854ff169274 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -226,10 +226,12 @@ static int enable_start_cpu0;
 static void notrace start_secondary(void *unused)
 {
 	/*
-	 * Don't put *anything* before cpu_init(), SMP booting is too
-	 * fragile that we want to limit the things done here to the
-	 * most necessary things.
+	 * Don't put *anything* except direct CPU state initialization
+	 * before cpu_init(), SMP booting is too fragile that we want to
+	 * limit the things done here to the most necessary things.
 	 */
+	if (boot_cpu_has(X86_FEATURE_PCID))
+		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
 	cpu_init();
 	x86_cpuinit.early_percpu_clock_init();
 	preempt_disable();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4253adef9044..699704d4bc9e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
 	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-	native_store_idt(&dt);
+	store_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
 	vmx->host_idt_base = dt.address;
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7777ccc0e9f9..af5c1ed21d43 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -19,6 +19,7 @@
 #include <asm/microcode.h>
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
+#include <asm/cpufeature.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
 	}
 }
 
+static void setup_pcid(void)
+{
+#ifdef CONFIG_X86_64
+	if (boot_cpu_has(X86_FEATURE_PCID)) {
+		if (boot_cpu_has(X86_FEATURE_PGE)) {
+			/*
+			 * This can't be cr4_set_bits_and_update_boot() --
+			 * the trampoline code can't handle CR4.PCIDE and
+			 * it wouldn't do any good anyway.  Despite the name,
+			 * cr4_set_bits_and_update_boot() doesn't actually
+			 * cause the bits in question to remain set all the
+			 * way through the secondary boot asm.
+			 *
+			 * Instead, we brute-force it and set CR4.PCIDE
+			 * manually in start_secondary().
+			 */
+			cr4_set_bits(X86_CR4_PCIDE);
+		} else {
+			/*
+			 * flush_tlb_all(), as currently implemented, won't
+			 * work if PCID is on but PGE is not.  Since that
+			 * combination doesn't exist on real hardware, there's
+			 * no reason to try to fully support it, but it's
+			 * polite to avoid corrupting data if we're on
+			 * an improperly configured VM.
+			 */
+			setup_clear_cpu_cap(X86_FEATURE_PCID);
+		}
+	}
+#endif
+}
+
 #ifdef CONFIG_X86_32
 #define NR_RANGE_MR 3
 #else /* CONFIG_X86_64 */
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
 	unsigned long end;
 
 	probe_page_size_mask();
+	setup_pcid();
 
 #ifdef CONFIG_X86_64
 	end = max_pfn << PAGE_SHIFT;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 218834a3e9ad..b372f3442bbf 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 {
 	int changed = !pte_same(*ptep, entry);
 
-	if (changed && dirty) {
+	if (changed && dirty)
 		*ptep = entry;
-		pte_update(vma->vm_mm, address, ptep);
-	}
 
 	return changed;
 }
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
 					 (unsigned long *) &ptep->pte);
 
-	if (ret)
-		pte_update(vma->vm_mm, addr, ptep);
-
 	return ret;
 }
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 37689a7cc03b..1ab3821f9e26 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * hypothetical buggy code that directly switches to swapper_pg_dir
 	 * without going through leave_mm() / switch_mm_irqs_off() or that
 	 * does something like write_cr3(read_cr3_pa()).
+	 *
+	 * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
+	 * isn't free.
 	 */
-	VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+#ifdef CONFIG_DEBUG_VM
+	if (WARN_ON_ONCE(__read_cr3() !=
+			 (__sme_pa(real_prev->pgd) | prev_asid))) {
+		/*
+		 * If we were to BUG here, we'd be very likely to kill
+		 * the system so hard that we don't see the call trace.
+		 * Try to recover instead by ignoring the error and doing
+		 * a global flush to minimize the chance of corruption.
+		 *
+		 * (This is far from being a fully correct recovery.
+		 *  Architecturally, the CPU could prefetch something
+		 *  back into an incorrect ASID slot and leave it there
+		 *  to cause trouble down the road.  It's better than
+		 *  nothing, though.)
+		 */
+		__flush_tlb_all();
+	}
+#endif
 
 	if (real_prev == next) {
 		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2598d81cd55..f910c514438f 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 		return -EOVERFLOW;
 	rdr->jump_address = (unsigned long)restore_registers;
 	rdr->jump_address_phys = __pa_symbol(restore_registers);
-	rdr->cr3 = restore_cr3;
+
+	/*
+	 * The restore code fixes up CR3 and CR4 in the following sequence:
+	 *
+	 * [in hibernation asm]
+	 * 1. CR3 <= temporary page tables
+	 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+	 * 3. CR3 <= rdr->cr3
+	 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+	 * [in restore_processor_state()]
+	 * 5. CR4 <= saved CR4
+	 * 6. CR3 <= saved CR3
+	 *
+	 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+	 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+	 * rdr->cr3 needs to point to valid page tables but must not
+	 * have any of the PCID bits set.
+	 */
+	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
 	rdr->magic = RESTORE_MAGIC;
 
 	hibernation_e820_save(rdr->e820_digest);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae2a2e2d6362..69b9deff7e5c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.read_cr0 = xen_read_cr0,
 	.write_cr0 = xen_write_cr0,
 
-	.read_cr4 = native_read_cr4,
 	.write_cr4 = xen_write_cr4,
 
 #ifdef CONFIG_X86_64
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.alloc_ldt = xen_alloc_ldt,
 	.free_ldt = xen_free_ldt,
 
-	.store_idt = native_store_idt,
 	.store_tr = xen_store_tr,
 
 	.write_ldt_entry = xen_write_ldt_entry,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 6b983b300666..509f560bd0c6 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.flush_tlb_single = xen_flush_tlb_single,
 	.flush_tlb_others = xen_flush_tlb_others,
 
-	.pte_update = paravirt_nop,
-
 	.pgd_alloc = xen_pgd_alloc,
 	.pgd_free = xen_pgd_free,
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-09-12 15:38 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-09-12 15:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: e2329b4252f373c244e75928be38bf1dd45b35da x86/cpu: Remove unused and undefined __generic_processor_info() declaration

Two fixes: dead code removal, plus a SME memory encryption fix on 32-bit kernels 
that crashed Xen guests.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/mem_encrypt.h        # 21d9bb4a05ba: x86/mm: Make the SME mask a 

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/mm: Make the SME mask a u64

Dou Liyang (1):
      x86/cpu: Remove unused and undefined __generic_processor_info() declaration


 arch/x86/include/asm/mem_encrypt.h |  4 ++--
 arch/x86/include/asm/mpspec.h      |  1 -
 arch/x86/kernel/apic/apic.c        |  2 +-
 arch/x86/mm/mem_encrypt.c          |  2 +-
 include/linux/mem_encrypt.h        | 13 +++++++++----
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 8e618fcf1f7c..6a77c63540f7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,7 +21,7 @@
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
-extern unsigned long sme_me_mask;
+extern u64 sme_me_mask;
 
 void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
 			 unsigned long decrypted_kernel_vaddr,
@@ -49,7 +49,7 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
 
 #else	/* !CONFIG_AMD_MEM_ENCRYPT */
 
-#define sme_me_mask	0UL
+#define sme_me_mask	0ULL
 
 static inline void __init sme_early_encrypt(resource_size_t paddr,
 					    unsigned long size) { }
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 831eb7895535..c471ca1f9412 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -86,7 +86,6 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
 #endif
 
 int generic_processor_info(int apicid, int version);
-int __generic_processor_info(int apicid, int version, bool enabled);
 
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_LOCAL_APIC)
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7834f73efbf1..6e19ef152869 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2130,7 +2130,7 @@ int generic_processor_info(int apicid, int version)
 	 * Since fixing handling of boot_cpu_physical_apicid requires
 	 * another discussion and tests on each platform, we leave it
 	 * for now and here we use read_apic_id() directly in this
-	 * function, __generic_processor_info().
+	 * function, generic_processor_info().
 	 */
 	if (disabled_cpu_apicid != BAD_APICID &&
 	    disabled_cpu_apicid != read_apic_id() &&
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 0fbd09269757..3fcc8e01683b 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -37,7 +37,7 @@ static char sme_cmdline_off[] __initdata = "off";
  * reside in the .data section so as not to be zeroed out when the .bss
  * section is later cleared.
  */
-unsigned long sme_me_mask __section(.data) = 0;
+u64 sme_me_mask __section(.data) = 0;
 EXPORT_SYMBOL_GPL(sme_me_mask);
 
 /* Buffer used for early in-place encryption by BSP, no locking needed */
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 1255f09f5e42..265a9cd21cb4 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -21,7 +21,7 @@
 
 #else	/* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
-#define sme_me_mask	0UL
+#define sme_me_mask	0ULL
 
 #endif	/* CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
@@ -30,18 +30,23 @@ static inline bool sme_active(void)
 	return !!sme_me_mask;
 }
 
-static inline unsigned long sme_get_me_mask(void)
+static inline u64 sme_get_me_mask(void)
 {
 	return sme_me_mask;
 }
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
 /*
  * The __sme_set() and __sme_clr() macros are useful for adding or removing
  * the encryption mask from a value (e.g. when dealing with pagetable
  * entries).
  */
-#define __sme_set(x)		((unsigned long)(x) | sme_me_mask)
-#define __sme_clr(x)		((unsigned long)(x) & ~sme_me_mask)
+#define __sme_set(x)		((x) | sme_me_mask)
+#define __sme_clr(x)		((x) & ~sme_me_mask)
+#else
+#define __sme_set(x)		(x)
+#define __sme_clr(x)		(x)
+#endif
 
 #endif	/* __ASSEMBLY__ */
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-08-26  7:26 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-08-26  7:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ccd5b3235180eef3cfec337df1c8554ab151b5cc x86/mm: Fix use-after-free of ldt_struct

Two fixes: one for an ldt_struct handling bug and a cherry-picked objtool fix.

 Thanks,

	Ingo

------------------>
Eric Biggers (1):
      x86/mm: Fix use-after-free of ldt_struct

Josh Poimboeuf (1):
      objtool: Fix '-mtune=atom' decoding support in objtool 2.0


 arch/x86/include/asm/mmu_context.h |  4 +---
 tools/objtool/arch/x86/decode.c    | 26 +++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 265c907d7d4c..7a234be7e298 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.execute_only_pkey = -1;
 	}
 	#endif
-	init_new_context_ldt(tsk, mm);
-
-	return 0;
+	return init_new_context_ldt(tsk, mm);
 }
 static inline void destroy_context(struct mm_struct *mm)
 {
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a36c2eba64e7..4559a21a8de2 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -271,7 +271,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	case 0x8d:
 		if (rex == 0x48 && modrm == 0x65) {
 
-			/* lea -disp(%rbp), %rsp */
+			/* lea disp(%rbp), %rsp */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_ADD;
 			op->src.reg = CFI_BP;
@@ -281,6 +281,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			break;
 		}
 
+		if (rex == 0x48 && (modrm == 0xa4 || modrm == 0x64) &&
+		    sib == 0x24) {
+
+			/* lea disp(%rsp), %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x48 && modrm == 0x2c && sib == 0x24) {
+
+			/* lea (%rsp), %rbp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_BP;
+			break;
+		}
+
 		if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
 		    insn.displacement.value == 8) {
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-07-21 10:26 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-07-21 10:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: db15e7f27369b81b6605a546d54eb844f87370a5 x86/devicetree: Convert to using %pOF instead of ->full_name

Half of the fixes are for various build time warnings triggered by randconfig 
builds. Most (but not all...) were harmless. There's also:

 - ACPI boundary condition fixes,
 - UV platform fixes,
 - defconfig updates,
 - an AMD K6 CPU init fix,
 - a %pOF printk format related preparatory change,
 - and warning fix related to the tlb/PCID changes.

 Thanks,

	Ingo

------------------>
Andrew Banman (1):
      x86/platform/uv/BAU: Disable BAU on single hub configurations

Arnd Bergmann (7):
      perf/x86: Shut up false-positive -Wmaybe-uninitialized warning
      x86/fpu/math-emu: Fix possible uninitialized variable use
      x86/fpu/math-emu: Avoid bogus -Wint-in-bool-context warning
      x86/io: Add "memory" clobber to insb/insw/insl/outsb/outsw/outsl
      x86/build: Silence the build with "make -s"
      x86/platform: Add PCI dependency for PUNIT_ATOM_DEBUG
      x86/platform/intel-mid: Fix a format string overflow warning

Justin Ernst (1):
      x86/platform/uv/BAU: Fix congested_response_us not taking effect

Krzysztof Kozlowski (1):
      x86/defconfig: Remove stale, old Kconfig options

Mikulas Patocka (1):
      x86/cpu: Use indirect call to measure performance in init_amd_k6()

Rob Herring (1):
      x86/devicetree: Convert to using %pOF instead of ->full_name

Roman Kagan (1):
      x86/mm, KVM: Fix warning when !CONFIG_PREEMPT_COUNT

Seunghun Han (2):
      x86/acpi: Prevent out of bound access caused by broken ACPI tables
      x86/ioapic: Pass the correct data to unmask_ioapic_irq()


 arch/x86/Kconfig.debug                             |  1 +
 arch/x86/boot/Makefile                             |  5 ++--
 arch/x86/configs/i386_defconfig                    |  3 ---
 arch/x86/configs/x86_64_defconfig                  |  3 ---
 arch/x86/events/core.c                             |  4 +--
 arch/x86/include/asm/io.h                          |  4 +--
 arch/x86/include/asm/mmu_context.h                 |  2 +-
 arch/x86/kernel/acpi/boot.c                        |  8 ++++++
 arch/x86/kernel/apic/io_apic.c                     |  2 +-
 arch/x86/kernel/cpu/amd.c                          |  1 +
 arch/x86/kernel/devicetree.c                       |  3 +--
 arch/x86/math-emu/Makefile                         |  4 +--
 arch/x86/math-emu/fpu_emu.h                        |  2 +-
 arch/x86/math-emu/reg_compare.c                    | 16 ++++++------
 .../intel-mid/device_libs/platform_max7315.c       |  6 +++--
 arch/x86/platform/uv/tlb_uv.c                      | 29 +++++++++++++++-------
 16 files changed, 55 insertions(+), 38 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fcb7604172ce..cd20ca0b4043 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -348,6 +348,7 @@ config X86_DEBUG_FPU
 
 config PUNIT_ATOM_DEBUG
 	tristate "ATOM Punit debug driver"
+	depends on PCI
 	select DEBUG_FS
 	select IOSF_MBI
 	---help---
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0d810fb15eac..d88a2fddba8c 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -73,12 +73,13 @@ UBSAN_SANITIZE := n
 $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
 
 quiet_cmd_image = BUILD   $@
+silent_redirect_image = >/dev/null
 cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
-			       $(obj)/zoffset.h $@
+			       $(obj)/zoffset.h $@ $($(quiet)redirect_image)
 
 $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
 	$(call if_changed,image)
-	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+	@$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
 OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 6cf79e1a6830..0eb9f92f3717 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_64BIT is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -125,7 +124,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
 CONFIG_NF_NAT=y
 CONFIG_IP_NF_TARGET_MASQUERADE=y
 CONFIG_IP_NF_MANGLE=y
@@ -255,7 +253,6 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index de45f57b410d..4a4b16e56d35 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -124,7 +123,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
 CONFIG_NF_NAT=y
 CONFIG_IP_NF_TARGET_MASQUERADE=y
 CONFIG_IP_NF_MANGLE=y
@@ -251,7 +249,6 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ff1ea2fb9705..8e3db8f642a7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -191,8 +191,8 @@ static void release_pmc_hardware(void) {}
 
 static bool check_hw_exists(void)
 {
-	u64 val, val_fail, val_new= ~0;
-	int i, reg, reg_fail, ret = 0;
+	u64 val, val_fail = -1, val_new= ~0;
+	int i, reg, reg_fail = -1, ret = 0;
 	int bios_fail = 0;
 	int reg_safe = -1;
 
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7afb0e2f07f4..48febf07e828 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -328,13 +328,13 @@ static inline unsigned type in##bwl##_p(int port)			\
 static inline void outs##bwl(int port, const void *addr, unsigned long count) \
 {									\
 	asm volatile("rep; outs" #bwl					\
-		     : "+S"(addr), "+c"(count) : "d"(port));		\
+		     : "+S"(addr), "+c"(count) : "d"(port) : "memory");	\
 }									\
 									\
 static inline void ins##bwl(int port, void *addr, unsigned long count)	\
 {									\
 	asm volatile("rep; ins" #bwl					\
-		     : "+D"(addr), "+c"(count) : "d"(port));		\
+		     : "+D"(addr), "+c"(count) : "d"(port) : "memory");	\
 }
 
 BUILDIO(b, b, char)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index ecfcb6643c9b..265c907d7d4c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -293,7 +293,7 @@ static inline unsigned long __get_current_cr3_fast(void)
 	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
 
 	/* For now, be very restrictive about when this can be called. */
-	VM_WARN_ON(in_nmi() || !in_atomic());
+	VM_WARN_ON(in_nmi() || preemptible());
 
 	VM_BUG_ON(cr3 != __read_cr3());
 	return cr3;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6bb680671088..7491e73d9253 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -347,6 +347,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	struct mpc_intsrc mp_irq;
 
 	/*
+	 * Check bus_irq boundary.
+	 */
+	if (bus_irq >= NR_IRQS_LEGACY) {
+		pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq);
+		return;
+	}
+
+	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
 	ioapic = mp_find_ioapic(gsi);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b4f5f73febdb..237e9c2341c7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2093,7 +2093,7 @@ static inline void __init check_timer(void)
 			int idx;
 			idx = find_irq_entry(apic1, pin1, mp_INT);
 			if (idx != -1 && irq_trigger(idx))
-				unmask_ioapic_irq(irq_get_chip_data(0));
+				unmask_ioapic_irq(irq_get_irq_data(0));
 		}
 		irq_domain_deactivate_irq(irq_data);
 		irq_domain_activate_irq(irq_data);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8f5fd4..3b9e220621f8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -134,6 +134,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
 		n = K6_BUG_LOOP;
 		f_vide = vide;
+		OPTIMIZER_HIDE_VAR(f_vide);
 		d = rdtsc();
 		while (n--)
 			f_vide();
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3fe45f84ced4..cbf1f6ba39a8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -235,8 +235,7 @@ static void __init dtb_add_ioapic(struct device_node *dn)
 
 	ret = of_address_to_resource(dn, 0, &r);
 	if (ret) {
-		printk(KERN_ERR "Can't obtain address from node %s.\n",
-				dn->full_name);
+		printk(KERN_ERR "Can't obtain address from device node %pOF.\n", dn);
 		return;
 	}
 	mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile
index 9b0c63b60302..1b2dac174321 100644
--- a/arch/x86/math-emu/Makefile
+++ b/arch/x86/math-emu/Makefile
@@ -5,8 +5,8 @@
 #DEBUG	= -DDEBUGGING
 DEBUG	=
 PARANOID = -DPARANOID
-EXTRA_CFLAGS	:= $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
-EXTRA_AFLAGS	:= $(PARANOID)
+ccflags-y += $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
+asflags-y += $(PARANOID)
 
 # From 'C' language sources:
 C_OBJS =fpu_entry.o errors.o \
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index afbc4d805d66..c9c320dccca1 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -157,7 +157,7 @@ extern u_char const data_sizes_16[32];
 
 #define signbyte(a) (((u_char *)(a))[9])
 #define getsign(a) (signbyte(a) & 0x80)
-#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
+#define setsign(a,b) { if ((b) != 0) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
 #define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
                         else signbyte(b) &= 0x7f; }
 #define changesign(a) { signbyte(a) ^= 0x80; }
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index b77360fdbf4a..19b33b50adfa 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -168,7 +168,7 @@ static int compare(FPU_REG const *b, int tagb)
 /* This function requires that st(0) is not empty */
 int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 {
-	int f = 0, c;
+	int f, c;
 
 	c = compare(loaded_data, loaded_tag);
 
@@ -189,12 +189,12 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 		case COMP_No_Comp:
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#ifdef PARANOID
 		default:
+#ifdef PARANOID
 			EXCEPTION(EX_INTERNAL | 0x121);
+#endif /* PARANOID */
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#endif /* PARANOID */
 		}
 	setcc(f);
 	if (c & COMP_Denormal) {
@@ -205,7 +205,7 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 
 static int compare_st_st(int nr)
 {
-	int f = 0, c;
+	int f, c;
 	FPU_REG *st_ptr;
 
 	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
@@ -235,12 +235,12 @@ static int compare_st_st(int nr)
 		case COMP_No_Comp:
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#ifdef PARANOID
 		default:
+#ifdef PARANOID
 			EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#endif /* PARANOID */
 		}
 	setcc(f);
 	if (c & COMP_Denormal) {
@@ -283,12 +283,12 @@ static int compare_i_st_st(int nr)
 	case COMP_No_Comp:
 		f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
 		break;
-#ifdef PARANOID
 	default:
+#ifdef PARANOID
 		EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
 		f = 0;
 		break;
-#endif /* PARANOID */
 	}
 	FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
 	if (c & COMP_Denormal) {
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 6e075afa7877..58337b2bc682 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -38,8 +38,10 @@ static void __init *max7315_platform_data(void *info)
 	 */
 	strcpy(i2c_info->type, "max7315");
 	if (nr++) {
-		sprintf(base_pin_name, "max7315_%d_base", nr);
-		sprintf(intr_pin_name, "max7315_%d_int", nr);
+		snprintf(base_pin_name, sizeof(base_pin_name),
+			 "max7315_%d_base", nr);
+		snprintf(intr_pin_name, sizeof(intr_pin_name),
+			 "max7315_%d_int", nr);
 	} else {
 		strcpy(base_pin_name, "max7315_base");
 		strcpy(intr_pin_name, "max7315_int");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index d4a61ddf9e62..3e4bdb442fbc 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -40,7 +40,6 @@ static int timeout_base_ns[] = {
 static int timeout_us;
 static bool nobau = true;
 static int nobau_perm;
-static cycles_t congested_cycles;
 
 /* tunables: */
 static int max_concurr		= MAX_BAU_CONCURRENT;
@@ -829,10 +828,10 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
 		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
 			bcp->period_requests++;
 			bcp->period_time += elapsed;
-			if ((elapsed > congested_cycles) &&
+			if ((elapsed > usec_2_cycles(bcp->cong_response_us)) &&
 			    (bcp->period_requests > bcp->cong_reps) &&
 			    ((bcp->period_time / bcp->period_requests) >
-							congested_cycles)) {
+					usec_2_cycles(bcp->cong_response_us))) {
 				stat->s_congested++;
 				disable_for_period(bcp, stat);
 			}
@@ -2222,14 +2221,17 @@ static int __init uv_bau_init(void)
 	else if (is_uv1_hub())
 		ops = uv1_bau_ops;
 
+	nuvhubs = uv_num_possible_blades();
+	if (nuvhubs < 2) {
+		pr_crit("UV: BAU disabled - insufficient hub count\n");
+		goto err_bau_disable;
+	}
+
 	for_each_possible_cpu(cur_cpu) {
 		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
 	}
 
-	nuvhubs = uv_num_possible_blades();
-	congested_cycles = usec_2_cycles(congested_respns_us);
-
 	uv_base_pnode = 0x7fffffff;
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		cpus = uv_blade_nr_possible_cpus(uvhub);
@@ -2242,9 +2244,8 @@ static int __init uv_bau_init(void)
 		enable_timeouts();
 
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
-		set_bau_off();
-		nobau_perm = 1;
-		return 0;
+		pr_crit("UV: BAU disabled - per CPU init failed\n");
+		goto err_bau_disable;
 	}
 
 	vector = UV_BAU_MESSAGE;
@@ -2270,6 +2271,16 @@ static int __init uv_bau_init(void)
 	}
 
 	return 0;
+
+err_bau_disable:
+
+	for_each_possible_cpu(cur_cpu)
+		free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
+
+	set_bau_off();
+	nobau_perm = 1;
+
+	return -EINVAL;
 }
 core_initcall(uv_bau_init);
 fs_initcall(uv_ptc_init);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-06-10  9:03 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-06-10  9:03 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 5b0bc9ac2ce4881ee318a21f31140584ce4dbdad x86/microcode/intel: Clear patch pointer before jettisoning the initrd

Misc fixes: a Geode fix plus a microcode loader fix.

 Thanks,

	Ingo

------------------>
Christian Sünkenberg (1):
      x86/cpu/cyrix: Add alternative Device ID of Geode GX1 SoC

Dominik Brodowski (1):
      x86/microcode/intel: Clear patch pointer before jettisoning the initrd


 arch/x86/kernel/cpu/cyrix.c           | 1 +
 arch/x86/kernel/cpu/microcode/intel.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index a70fd61095f8..6f077445647a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 		break;
 
 	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+	case 11: /* GX1 with inverted Device ID */
 #ifdef CONFIG_PCI
 	{
 		u32 vendor, device;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index afdfd237b59f..f522415bf9e5 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -619,6 +619,9 @@ int __init save_microcode_in_initrd_intel(void)
 
 	show_saved_mc();
 
+	/* initrd is going away, clear patch ptr. */
+	intel_ucode_patch = NULL;
+
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-06-02  6:54 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-06-02  6:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c08d517480ea342cc43acdacc5cf4a795e18151d Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT"

Misc fixes:

 - revert a broken PAT commit that broke a number of systems,

 - fix two preemptability warnings/bugs that can trigger under certain 
   circumstances, in the debug code and in the microcode loader.

 Thanks,

	Ingo

------------------>
Borislav Petkov (2):
      x86/microcode/AMD: Change load_microcode_amd()'s param to bool to fix preemptibility bug
      x86/debug/32: Convert a smp_processor_id() call to raw to avoid DEBUG_PREEMPT warning

Ingo Molnar (1):
      Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT"


 arch/x86/kernel/cpu/microcode/amd.c | 16 ++++++++--------
 arch/x86/kernel/process_32.c        |  2 +-
 arch/x86/mm/pat.c                   |  9 +++------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 45db4d2ebd01..e9f4d762aa5b 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
 
 int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 {
@@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 	if (!desc.mc)
 		return -EINVAL;
 
-	ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
-				 desc.data, desc.size);
+	ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
 	if (ret != UCODE_OK)
 		return -EINVAL;
 
@@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
 {
 	enum ucode_state ret;
 
@@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
 
 #ifdef CONFIG_X86_32
 	/* save BSP's matching patch for early load */
-	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
-		struct ucode_patch *p = find_patch(cpu);
+	if (save) {
+		struct ucode_patch *p = find_patch(0);
 		if (p) {
 			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
 			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
 	enum ucode_state ret = UCODE_NFOUND;
 	const struct firmware *fw;
 
 	/* reload ucode container only on the boot cpu */
-	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+	if (!refresh_fw || !bsp)
 		return UCODE_OK;
 
 	if (c->x86 >= 0x15)
@@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+	ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
 
  fw_release:
 	release_firmware(fw);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ff40e74c9181..ffeae818aa7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
 	printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
-		smp_processor_id());
+		raw_smp_processor_id());
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 83a59a67757a..9b78685b66e6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -65,11 +65,9 @@ static int __init nopat(char *str)
 }
 early_param("nopat", nopat);
 
-static bool __read_mostly __pat_initialized = false;
-
 bool pat_enabled(void)
 {
-	return __pat_initialized;
+	return !!__pat_enabled;
 }
 EXPORT_SYMBOL_GPL(pat_enabled);
 
@@ -227,14 +225,13 @@ static void pat_bsp_init(u64 pat)
 	}
 
 	wrmsrl(MSR_IA32_CR_PAT, pat);
-	__pat_initialized = true;
 
 	__init_cache_modes(pat);
 }
 
 static void pat_ap_init(u64 pat)
 {
-	if (!this_cpu_has(X86_FEATURE_PAT)) {
+	if (!boot_cpu_has(X86_FEATURE_PAT)) {
 		/*
 		 * If this happens we are on a secondary CPU, but switched to
 		 * PAT on the boot CPU. We have no way to undo PAT.
@@ -309,7 +306,7 @@ void pat_init(void)
 	u64 pat;
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	if (!__pat_enabled) {
+	if (!pat_enabled()) {
 		init_cache_modes();
 		return;
 	}

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-05-12  7:39 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-05-12  7:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: fb8fb46c56289b3f34b5d90a4ec65e9e4e4544a5 x86/intel_rdt: Fix a typo in Documentation

It's mostly misc fixes:

 - two boot crash fixes
 - unwinder fixes
 - kexec related kernel direct mappings enhancements/fixes
 - more Clang support quirks
 - minor cleanups
 - Documentation fixes

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/boot/32: Fix UP boot on Quark and possibly other platforms

Baoquan He (1):
      x86/mm: Fix boot crash caused by incorrect loop count calculation in sync_global_pgds()

Colin Ian King (1):
      x86/microcode/AMD: Remove redundant NULL check on mc

Josh Poimboeuf (1):
      x86/asm: Don't use RBP as a temporary register in csum_partial_copy_generic()

Kees Cook (1):
      x86/boot: Declare error() as noreturn

Laura Abbott (1):
      x86/mm/32: Set the '__vmalloc_start_set' flag in initmem_init()

Matthias Kaehlcke (1):
      x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work around Clang incompatibility

Nick Desaulniers (1):
      x86/build: Don't add -maccumulate-outgoing-args w/o compiler support

Xiaochen Shen (1):
      x86/intel_rdt: Fix a typo in Documentation

Xunlei Pang (2):
      x86/mm: Add support for gbpages to kernel_ident_mapping_init()
      x86/kexec/64: Use gbpages for identity mappings if available


 Documentation/x86/intel_rdt_ui.txt   |  2 +-
 arch/x86/Makefile                    |  3 ++-
 arch/x86/boot/compressed/error.h     |  4 +++-
 arch/x86/boot/compressed/pagetable.c |  2 +-
 arch/x86/include/asm/asm.h           |  1 +
 arch/x86/include/asm/init.h          |  3 ++-
 arch/x86/kernel/cpu/microcode/amd.c  |  2 --
 arch/x86/kernel/machine_kexec_64.c   |  6 +++++-
 arch/x86/kernel/setup.c              | 15 +++++++++++++++
 arch/x86/kernel/setup_percpu.c       | 10 +++++-----
 arch/x86/lib/csum-copy_64.S          | 12 ++++++------
 arch/x86/lib/kaslr.c                 |  3 ++-
 arch/x86/mm/ident_map.c              | 14 +++++++++++++-
 arch/x86/mm/init_64.c                | 12 ++++++------
 arch/x86/mm/numa_32.c                |  1 +
 arch/x86/power/hibernate_64.c        |  2 +-
 16 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 0f6d8477b66c..c491a1b82de2 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -295,7 +295,7 @@ kernel and the tasks running there get 50% of the cache. They should
 also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
 siblings and only the real time threads are scheduled on the cores 4-7.
 
-# echo C0 > p0/cpus
+# echo F0 > p0/cpus
 
 4) Locking between applications
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4430dd489620..5851411e60fb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,7 +179,8 @@ ifdef CONFIG_JUMP_LABEL
 endif
 
 ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
-	KBUILD_CFLAGS += -maccumulate-outgoing-args
+	# This compiler flag is not supported by Clang:
+	KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
 # Stackpointer is addressed different for 32 bit and 64 bit x86
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 2e59dac07f9e..d732e608e3af 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -1,7 +1,9 @@
 #ifndef BOOT_COMPRESSED_ERROR_H
 #define BOOT_COMPRESSED_ERROR_H
 
+#include <linux/compiler.h>
+
 void warn(char *m);
-void error(char *m);
+void error(char *m) __noreturn;
 
 #endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 56589d0a804b..1d78f1739087 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -70,7 +70,7 @@ static unsigned long level4p;
  * Due to relocation, pointers must be assigned at run time not build time.
  */
 static struct x86_mapping_info mapping_info = {
-	.pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
+	.page_flag       = __PAGE_KERNEL_LARGE_EXEC,
 };
 
 /* Locates and clears a region for a new top level page table. */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7acb51c49fec..7a9df3beb89b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -32,6 +32,7 @@
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+#define _ASM_MUL	__ASM_SIZE(mul)
 
 #define _ASM_AX		__ASM_REG(ax)
 #define _ASM_BX		__ASM_REG(bx)
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62bfeb0..474eb8c66fee 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,8 +4,9 @@
 struct x86_mapping_info {
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
-	unsigned long pmd_flag;		 /* page flag for PMD entry */
+	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
+	bool direct_gbpages;		 /* PUD level 1GB page support */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 7889ae492af0..1d38e53c2d5c 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -352,8 +352,6 @@ void reload_ucode_amd(void)
 	u32 rev, dummy;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
-	if (!mc)
-		return;
 
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 085c3b300d32..c25d277d7d7e 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -122,6 +122,10 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 
 	level4p = (pgd_t *)__va(start_pgtable);
 	clear_page(level4p);
+
+	if (direct_gbpages)
+		info.direct_gbpages = true;
+
 	for (i = 0; i < nr_pfn_mapped; i++) {
 		mstart = pfn_mapped[i].start << PAGE_SHIFT;
 		mend   = pfn_mapped[i].end << PAGE_SHIFT;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 603a1669a2ec..0b4d3c686b1e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1225,6 +1225,21 @@ void __init setup_arch(char **cmdline_p)
 
 	kasan_init();
 
+#ifdef CONFIG_X86_32
+	/* sync back kernel address range */
+	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+
+	/*
+	 * sync back low identity map too.  It is used for example
+	 * in the 32-bit EFI stub.
+	 */
+	clone_pgd_range(initial_page_table,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+#endif
+
 	tboot_probe();
 
 	map_vsyscall();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bb1e8cc0bc84..10edd1e69a68 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -291,11 +291,11 @@ void __init setup_per_cpu_areas(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * Sync back kernel address range.  We want to make sure that
-	 * all kernel mappings, including percpu mappings, are available
-	 * in the smpboot asm.  We can't reliably pick up percpu
-	 * mappings using vmalloc_fault(), because exception dispatch
-	 * needs percpu data.
+	 * Sync back kernel address range again.  We already did this in
+	 * setup_arch(), but percpu data also needs to be available in
+	 * the smpboot asm.  We can't reliably pick up percpu mappings
+	 * using vmalloc_fault(), because exception dispatch needs
+	 * percpu data.
 	 */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 7e48807b2fa1..45a53dfe1859 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic)
 	movq  %r12, 3*8(%rsp)
 	movq  %r14, 4*8(%rsp)
 	movq  %r13, 5*8(%rsp)
-	movq  %rbp, 6*8(%rsp)
+	movq  %r15, 6*8(%rsp)
 
 	movq  %r8, (%rsp)
 	movq  %r9, 1*8(%rsp)
@@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic)
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
 	source
@@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic)
 	source
 	movq  32(%rdi), %r10
 	source
-	movq  40(%rdi), %rbp
+	movq  40(%rdi), %r15
 	source
 	movq  48(%rdi), %r14
 	source
@@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic)
 	adcq  %r11, %rax
 	adcq  %rdx, %rax
 	adcq  %r10, %rax
-	adcq  %rbp, %rax
+	adcq  %r15, %rax
 	adcq  %r14, %rax
 	adcq  %r13, %rax
 
@@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic)
 	dest
 	movq %r10, 32(%rsi)
 	dest
-	movq %rbp, 40(%rsi)
+	movq %r15, 40(%rsi)
 	dest
 	movq %r14, 48(%rsi)
 	dest
@@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic)
 	movq 3*8(%rsp), %r12
 	movq 4*8(%rsp), %r14
 	movq 5*8(%rsp), %r13
-	movq 6*8(%rsp), %rbp
+	movq 6*8(%rsp), %r15
 	addq $7*8, %rsp
 	ret
 
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 5761a4f19455..ab2d1d73e9e7 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -5,6 +5,7 @@
  * kernel starts. This file is included in the compressed kernel and
  * normally linked in the regular.
  */
+#include <asm/asm.h>
 #include <asm/kaslr.h>
 #include <asm/msr.h>
 #include <asm/archrandom.h>
@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose)
 	}
 
 	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
+	asm(_ASM_MUL "%3"
 	    : "=a" (random), "=d" (raw)
 	    : "a" (random), "rm" (mix_const));
 	random += raw;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 04210a29dd60..adab1595f4bd 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 		if (pmd_present(*pmd))
 			continue;
 
-		set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
+		set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
 	}
 }
 
@@ -30,6 +30,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (next > end)
 			next = end;
 
+		if (info->direct_gbpages) {
+			pud_t pudval;
+
+			if (pud_present(*pud))
+				continue;
+
+			addr &= PUD_MASK;
+			pudval = __pud((addr - info->offset) | info->page_flag);
+			set_pud(pud, pudval);
+			continue;
+		}
+
 		if (pud_present(*pud)) {
 			pmd = pmd_offset(pud, 0);
 			ident_pmd_init(info, pmd, addr, next);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 745e5e183169..97fe88749e18 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -94,10 +94,10 @@ __setup("noexec32=", nonx32_setup);
  */
 void sync_global_pgds(unsigned long start, unsigned long end)
 {
-	unsigned long address;
+	unsigned long addr;
 
-	for (address = start; address <= end; address += PGDIR_SIZE) {
-		pgd_t *pgd_ref = pgd_offset_k(address);
+	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+		pgd_t *pgd_ref = pgd_offset_k(addr);
 		const p4d_t *p4d_ref;
 		struct page *page;
 
@@ -106,7 +106,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		 * handle synchonization on p4d level.
 		 */
 		BUILD_BUG_ON(pgd_none(*pgd_ref));
-		p4d_ref = p4d_offset(pgd_ref, address);
+		p4d_ref = p4d_offset(pgd_ref, addr);
 
 		if (p4d_none(*p4d_ref))
 			continue;
@@ -117,8 +117,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 			p4d_t *p4d;
 			spinlock_t *pgt_lock;
 
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			p4d = p4d_offset(pgd, address);
+			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+			p4d = p4d_offset(pgd, addr);
 			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 6b7ce6279133..aca6295350f3 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -100,5 +100,6 @@ void __init initmem_init(void)
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 
+	__vmalloc_start_set = true;
 	setup_bootmem_allocator();
 }
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 6a61194ffd58..a6e21fee22ea 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
 {
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 		.offset		= __PAGE_OFFSET,
 	};
 	unsigned long mstart, mend;

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-03-07 20:40 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-03-07 20:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f2853308b6409b97799b0beceacd9da43a82fe43 x86/build/x86_64_defconfig: Enable CONFIG_R8169

Misc fixes and minor updates all over the place:

- An SGI/UV fix,
- a defconfig update,
- a build warning fix,
- move the boot_params file to the arch location in debugfs,
- a pkeys fix,
- selftests fix,
- boot message fixes,
- sparse fixes,
- a resume warning fix,
- ioapic hotplug fixes,
- reboot quirks,
- ... plus various minor cleanups.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/acpi/internal.h            # f2ae5da72617: x86/ioapic: Split IOAPIC hot
  drivers/acpi/ioapic.c              # f2ae5da72617: x86/ioapic: Split IOAPIC hot
  drivers/acpi/pci_root.c            # f2ae5da72617: x86/ioapic: Split IOAPIC hot
  tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c# 940b2f2fd963: x86/events: Remove last remn
  tools/testing/selftests/x86/fsgsbase.c# 2a4d0c627f53: x86/selftests: Add clobbers 
  tools/testing/selftests/x86/ldt_gdt.c# 2a4d0c627f53: x86/selftests: Add clobbers 
  tools/testing/selftests/x86/ptrace_syscall.c# 2a4d0c627f53: x86/selftests: Add clobbers 
  tools/testing/selftests/x86/single_step_syscall.c# 2a4d0c627f53: x86/selftests: Add clobbers 

 Thanks,

	Ingo

------------------>
Andrew Banman (1):
      x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register

Andy Shevchenko (1):
      x86/build/x86_64_defconfig: Enable CONFIG_R8169

Arnd Bergmann (1):
      x86/hyperv: Hide unused label

Borislav Petkov (3):
      x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/
      x86/events: Remove last remnants of old filenames
      x86/boot: Correct setup_header.start_sys name

Dave Hansen (1):
      x86/pkeys: Check against max pkey to avoid overflows

Dmitry Safonov (1):
      x86/selftests: Add clobbers for int80 on x86_64

Dou Liyang (2):
      x86/apic: Fix a warning message in logical CPU IDs allocation
      x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR()

Masanari Iida (2):
      x86/vmware: Remove duplicate inclusion of asm/timer.h
      x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h

Matjaz Hegedic (2):
      x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk
      x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk

Rui Wang (2):
      x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC
      x86/ioapic: Split IOAPIC hot-removal into two steps

Thomas Gleixner (1):
      x86/hpet: Prevent might sleep splat on resume

Tobin C. Harding (2):
      x86/purgatory: Make functions and variables static
      x86/purgatory: Fix sparse warning, symbol not declared


 arch/x86/configs/x86_64_defconfig                  |  1 +
 arch/x86/events/amd/core.c                         |  2 +-
 arch/x86/events/intel/cstate.c                     |  2 +-
 arch/x86/events/intel/rapl.c                       |  2 +-
 arch/x86/events/intel/uncore.h                     |  6 +++---
 arch/x86/hyperv/hv_init.c                          |  2 +-
 arch/x86/include/asm/pkeys.h                       | 15 ++++++++------
 arch/x86/include/uapi/asm/bootparam.h              |  2 +-
 arch/x86/kernel/apic/apic.c                        | 23 +++++++---------------
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c           |  1 -
 arch/x86/kernel/cpu/vmware.c                       |  1 -
 arch/x86/kernel/hpet.c                             |  2 +-
 arch/x86/kernel/kdebugfs.c                         |  2 +-
 arch/x86/kernel/reboot.c                           | 16 +++++++++++++++
 arch/x86/pci/common.c                              |  9 +++++++++
 arch/x86/platform/uv/tlb_uv.c                      |  1 -
 arch/x86/purgatory/purgatory.c                     | 11 ++++++-----
 arch/x86/purgatory/purgatory.h                     |  8 ++++++++
 arch/x86/purgatory/setup-x86_64.S                  |  1 +
 drivers/acpi/internal.h                            |  2 ++
 drivers/acpi/ioapic.c                              | 22 +++++++++++++++------
 drivers/acpi/pci_root.c                            |  4 ++--
 .../util/intel-pt-decoder/intel-pt-insn-decoder.c  |  2 +-
 tools/testing/selftests/x86/fsgsbase.c             |  2 +-
 tools/testing/selftests/x86/ldt_gdt.c              | 16 ++++++++++-----
 tools/testing/selftests/x86/ptrace_syscall.c       |  3 ++-
 tools/testing/selftests/x86/single_step_syscall.c  |  5 ++++-
 27 files changed, 106 insertions(+), 57 deletions(-)
 create mode 100644 arch/x86/purgatory/purgatory.h

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ef4a099defc..6205d3b81e6d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -176,6 +176,7 @@ CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
+CONFIG_R8169=y
 CONFIG_FDDI=y
 CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV_PSAUX is not set
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index afb222b63cae..c84584bb9402 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 			return &amd_f15_PMC20;
 		}
 	case AMD_EVENT_NB:
-		/* moved to perf_event_amd_uncore.c */
+		/* moved to uncore.c */
 		return &emptyconstraint;
 	default:
 		return &emptyconstraint;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index aff4b5b69d40..238ae3248ba5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_cstate.c: support cstate residency counters
+ * Support cstate residency counters
  *
  * Copyright (C) 2015, Intel Corp.
  * Author: Kan Liang (kan.liang@intel.com)
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 22054ca49026..9d05c7e67f60 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Support Intel RAPL energy consumption counters
  * Copyright (C) 2013 Google, Inc., Stephane Eranian
  *
  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index ad986c1e29bc..df5989f27b1b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head;
 extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
-/* perf_event_intel_uncore_snb.c */
+/* uncore_snb.c */
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
@@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void);
 void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
-/* perf_event_intel_uncore_snbep.c */
+/* uncore_snbep.c */
 int snbep_uncore_pci_init(void);
 void snbep_uncore_cpu_init(void);
 int ivbep_uncore_pci_init(void);
@@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void);
 int skx_uncore_pci_init(void);
 void skx_uncore_cpu_init(void);
 
-/* perf_event_intel_uncore_nhmex.c */
+/* uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index db64baf0e500..8bef70e7f3cc 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -158,13 +158,13 @@ void hyperv_init(void)
 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 		return;
 	}
+register_msr_cs:
 #endif
 	/*
 	 * For 32 bit guests just use the MSR based mechanism for reading
 	 * the partition counter.
 	 */
 
-register_msr_cs:
 	hyperv_cs = &hyperv_cs_msr;
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
 		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 34684adb6899..b3b09b98896d 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline
 bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
+	/*
+	 * "Allocated" pkeys are those that have been returned
+	 * from pkey_alloc().  pkey 0 is special, and never
+	 * returned from pkey_alloc().
+	 */
+	if (pkey <= 0)
+		return false;
+	if (pkey >= arch_max_pkey())
+		return false;
 	return mm_pkey_allocation_map(mm) & (1U << pkey);
 }
 
@@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm)
 static inline
 int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-	/*
-	 * pkey 0 is special, always allocated and can never
-	 * be freed.
-	 */
-	if (!pkey)
-		return -EINVAL;
 	if (!mm_pkey_is_allocated(mm, pkey))
 		return -EINVAL;
 
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 5138dacf8bb8..07244ea16765 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -58,7 +58,7 @@ struct setup_header {
 	__u32	header;
 	__u16	version;
 	__u32	realmode_swtch;
-	__u16	start_sys;
+	__u16	start_sys_seg;
 	__u16	kernel_version;
 	__u8	type_of_loader;
 	__u8	loadflags;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4261b3282ad9..aee7deddabd0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { }
 static inline void __x2apic_enable(void) { }
 #endif /* !CONFIG_X86_X2APIC */
 
-static int __init try_to_enable_IR(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (!x2apic_enabled() && skip_ioapic_setup) {
-		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
-		return -1;
-	}
-#endif
-	return irq_remapping_enable();
-}
-
 void __init enable_IR_x2apic(void)
 {
 	unsigned long flags;
 	int ret, ir_stat;
 
-	if (skip_ioapic_setup)
+	if (skip_ioapic_setup) {
+		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
 		return;
+	}
 
 	ir_stat = irq_remapping_prepare();
 	if (ir_stat < 0 && !x2apic_supported())
@@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void)
 
 	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
-		ir_stat = try_to_enable_IR();
+		ir_stat = irq_remapping_enable();
 	/* ir_stat contains the remap mode or an error code */
 	try_to_enable_x2apic(ir_stat);
 
@@ -2062,10 +2053,10 @@ static int allocate_logical_cpuid(int apicid)
 
 	/* Allocate a new cpuid. */
 	if (nr_logical_cpuids >= nr_cpu_ids) {
-		WARN_ONCE(1, "Only %d processors supported."
+		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
 			     "Processor %d/0x%x and the rest are ignored.\n",
-			     nr_cpu_ids - 1, nr_logical_cpuids, apicid);
-		return -1;
+			     nr_cpu_ids, nr_logical_cpuids, apicid);
+		return -EINVAL;
 	}
 
 	cpuid_to_apicid[nr_logical_cpuids] = apicid;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb9..759577d9d166 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -27,7 +27,6 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/task_work.h>
 
 #include <uapi/linux/magic.h>
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 891f4dad7b2c..22403a28caf5 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,7 +30,6 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <asm/apic.h>
-#include <asm/timer.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"vmware: " fmt
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dc6ba5bda9fc..89ff7af2de50 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
 
 		irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
 		irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
-		disable_irq(hdev->irq);
+		disable_hardirq(hdev->irq);
 		irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 		enable_irq(hdev->irq);
 	}
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index bdb83e431d89..38b64587b31b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void)
 	struct dentry *dbp, *version, *data;
 	int error = -ENOMEM;
 
-	dbp = debugfs_create_dir("boot_params", NULL);
+	dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
 	if (!dbp)
 		return -ENOMEM;
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e244c19a2451..4194d6f9bb29 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -223,6 +223,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TA */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TAW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 0cb52ae0a8f0..190e718694b1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pcibios_release_device(struct pci_dev *dev)
+{
+	if (atomic_dec_return(&dev->enable_cnt) >= 0)
+		pcibios_disable_device(dev);
+
+}
+#endif
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 766d4d3529a1..f25982cdff90 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode)
 
 	ops.write_payload_first(pnode, first);
 	ops.write_payload_last(pnode, last);
-	ops.write_g_sw_ack(pnode, 0xffffUL);
 
 	/* in effect, all msg_type's are set to MSG_NOOP */
 	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 25e068ba3382..b6d5c8946e66 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,6 +11,7 @@
  */
 
 #include "sha256.h"
+#include "purgatory.h"
 #include "../boot/string.h"
 
 struct sha_region {
@@ -18,11 +19,11 @@ struct sha_region {
 	unsigned long len;
 };
 
-unsigned long backup_dest = 0;
-unsigned long backup_src = 0;
-unsigned long backup_sz = 0;
+static unsigned long backup_dest;
+static unsigned long backup_src;
+static unsigned long backup_sz;
 
-u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
 
 struct sha_region sha_regions[16] = {};
 
@@ -39,7 +40,7 @@ static int copy_backup_region(void)
 	return 0;
 }
 
-int verify_sha256_digest(void)
+static int verify_sha256_digest(void)
 {
 	struct sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
new file mode 100644
index 000000000000..e2e365a6c192
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.h
@@ -0,0 +1,8 @@
+#ifndef PURGATORY_H
+#define PURGATORY_H
+
+#ifndef __ASSEMBLY__
+extern void purgatory(void);
+#endif	/* __ASSEMBLY__ */
+
+#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index fe3c91ba1bd0..f90e9dfa90bb 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,6 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
+#include "purgatory.h"
 
 	.text
 	.globl purgatory_start
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 219b90bc0922..f15900132912 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void);
 void acpi_container_init(void);
 void acpi_memory_hotplug_init(void);
 #ifdef	CONFIG_ACPI_HOTPLUG_IOAPIC
+void pci_ioapic_remove(struct acpi_pci_root *root);
 int acpi_ioapic_remove(struct acpi_pci_root *root);
 #else
+static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; }
 static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
 #endif
 #ifdef CONFIG_ACPI_DOCK
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index 6d7ce6e12aaa..1120dfd625b8 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -206,24 +206,34 @@ int acpi_ioapic_add(acpi_handle root_handle)
 	return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
 }
 
-int acpi_ioapic_remove(struct acpi_pci_root *root)
+void pci_ioapic_remove(struct acpi_pci_root *root)
 {
-	int retval = 0;
 	struct acpi_pci_ioapic *ioapic, *tmp;
 
 	mutex_lock(&ioapic_list_lock);
 	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
 		if (root->device->handle != ioapic->root_handle)
 			continue;
-
-		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
-			retval = -EBUSY;
-
 		if (ioapic->pdev) {
 			pci_release_region(ioapic->pdev, 0);
 			pci_disable_device(ioapic->pdev);
 			pci_dev_put(ioapic->pdev);
 		}
+	}
+	mutex_unlock(&ioapic_list_lock);
+}
+
+int acpi_ioapic_remove(struct acpi_pci_root *root)
+{
+	int retval = 0;
+	struct acpi_pci_ioapic *ioapic, *tmp;
+
+	mutex_lock(&ioapic_list_lock);
+	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
+		if (root->device->handle != ioapic->root_handle)
+			continue;
+		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
+			retval = -EBUSY;
 		if (ioapic->res.flags && ioapic->res.parent)
 			release_resource(&ioapic->res);
 		list_del(&ioapic->list);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index bf601d4df8cf..919be0aa2578 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device)
 
 	pci_stop_root_bus(root->bus);
 
-	WARN_ON(acpi_ioapic_remove(root));
-
+	pci_ioapic_remove(root);
 	device_set_run_wake(root->bus->bridge, false);
 	pci_acpi_remove_bus_pm_notifier(device);
 
 	pci_remove_root_bus(root->bus);
+	WARN_ON(acpi_ioapic_remove(root));
 
 	dmar_device_remove(device->handle);
 
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 7913363bde5c..4f3c758d875d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -31,7 +31,7 @@
 #error Instruction buffer size too small
 #endif
 
-/* Based on branch_type() from perf_event_intel_lbr.c */
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
 static void intel_pt_insn_decoder(struct insn *insn,
 				  struct intel_pt_insn *intel_pt_insn)
 {
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5b2b4b3c634c..b4967d875236 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@ void do_unexpected_base(void)
 		long ret;
 		asm volatile ("int $0x80"
 			      : "=a" (ret) : "a" (243), "b" (low_desc)
-			      : "flags");
+			      : "r8", "r9", "r10", "r11");
 		memcpy(&desc, low_desc, sizeof(desc));
 		munmap(low_desc, sizeof(desc));
 
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 4af47079cf04..f6121612e769 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
 #define AR_DB			(1 << 22)
 #define AR_G			(1 << 23)
 
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
 static int nerrs;
 
 /* Points to an array of 1024 ints, each holding its own index. */
@@ -588,7 +594,7 @@ static int invoke_set_thread_area(void)
 	asm volatile ("int $0x80"
 		      : "=a" (ret), "+m" (low_user_desc) :
 			"a" (243), "b" (low_user_desc)
-		      : "flags");
+		      : INT80_CLOBBERS);
 	return ret;
 }
 
@@ -657,7 +663,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -688,7 +694,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -721,7 +727,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -774,7 +780,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index b037ce9cf116..eaea92439708 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args)
 	asm volatile ("int $0x80"
 		      : "+a" (args->nr),
 			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
-			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+			: : "r8", "r9", "r10", "r11");
 	args->arg5 = bp;
 #else
 	sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50c26358e8b7..a48da95c18fd 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps;
 #ifdef __x86_64__
 # define REG_IP REG_RIP
 # define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
 #else
 # define REG_IP REG_EIP
 # define WIDTH "l"
+# define INT80_CLOBBERS
 #endif
 
 static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@ int main()
 
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
-	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+			: INT80_CLOBBERS);
 	check_result();
 
 	/*

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-02-28  8:08 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-02-28  8:08 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 8312593a55941a0fae2b09731a4f91d87bd796db Merge branches 'x86/cache', 'x86/debug' and 'x86/irq' into x86/urgent

Two documentation updates, plus a debugging annotation fix.

 Thanks,

	Ingo

------------------>
Daniel Bristot de Oliveira (1):
      x86/irq, trace: Add __irq_entry annotation to x86's platform IRQ handlers

Marcelo Tosatti (1):
      Documentation, x86, resctrl: Recommend locking for resctrlfs

Xunlei Pang (1):
      x86/crash: Update the stale comment in reserve_crashkernel()


 Documentation/x86/intel_rdt_ui.txt       | 114 +++++++++++++++++++++++++++++++
 arch/x86/kernel/apic/apic.c              |   8 +--
 arch/x86/kernel/apic/vector.c            |   2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c     |   4 +-
 arch/x86/kernel/cpu/mcheck/therm_throt.c |   6 +-
 arch/x86/kernel/cpu/mcheck/threshold.c   |   4 +-
 arch/x86/kernel/irq.c                    |   4 +-
 arch/x86/kernel/irq_work.c               |   5 +-
 arch/x86/kernel/setup.c                  |   4 +-
 arch/x86/kernel/smp.c                    |  15 ++--
 10 files changed, 144 insertions(+), 22 deletions(-)

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index d918d268cd72..51cf6fa5591f 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -212,3 +212,117 @@ Finally we move core 4-7 over to the new group and make sure that the
 kernel and the tasks running there get 50% of the cache.
 
 # echo C0 > p0/cpus
+
+4) Locking between applications
+
+Certain operations on the resctrl filesystem, composed of read/writes
+to/from multiple files, must be atomic.
+
+As an example, the allocation of an exclusive reservation of L3 cache
+involves:
+
+  1. Read the cbmmasks from each directory
+  2. Find a contiguous set of bits in the global CBM bitmask that is clear
+     in any of the directory cbmmasks
+  3. Create a new directory
+  4. Set the bits found in step 2 to the new directory "schemata" file
+
+If two applications attempt to allocate space concurrently then they can
+end up allocating the same bits so the reservations are shared instead of
+exclusive.
+
+To coordinate atomic operations on the resctrlfs and to avoid the problem
+above, the following locking procedure is recommended:
+
+Locking is based on flock, which is available in libc and also as a shell
+script command
+
+Write lock:
+
+ A) Take flock(LOCK_EX) on /sys/fs/resctrl
+ B) Read/write the directory structure.
+ C) funlock
+
+Read lock:
+
+ A) Take flock(LOCK_SH) on /sys/fs/resctrl
+ B) If success read the directory structure.
+ C) funlock
+
+Example with bash:
+
+# Atomically read directory structure
+$ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
+
+# Read directory contents and create new subdirectory
+
+$ cat create-dir.sh
+find /sys/fs/resctrl/ > output.txt
+mask = function-of(output.txt)
+mkdir /sys/fs/resctrl/newres/
+echo mask > /sys/fs/resctrl/newres/schemata
+
+$ flock /sys/fs/resctrl/ ./create-dir.sh
+
+Example with C:
+
+/*
+ * Example code do take advisory locks
+ * before accessing resctrl filesystem
+ */
+#include <sys/file.h>
+#include <stdlib.h>
+
+void resctrl_take_shared_lock(int fd)
+{
+	int ret;
+
+	/* take shared lock on resctrl filesystem */
+	ret = flock(fd, LOCK_SH);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void resctrl_take_exclusive_lock(int fd)
+{
+	int ret;
+
+	/* release lock on resctrl filesystem */
+	ret = flock(fd, LOCK_EX);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void resctrl_release_lock(int fd)
+{
+	int ret;
+
+	/* take shared lock on resctrl filesystem */
+	ret = flock(fd, LOCK_UN);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void main(void)
+{
+	int fd, ret;
+
+	fd = open("/sys/fs/resctrl", O_DIRECTORY);
+	if (fd == -1) {
+		perror("open");
+		exit(-1);
+	}
+	resctrl_take_shared_lock(fd);
+	/* code to read directory contents */
+	resctrl_release_lock(fd);
+
+	resctrl_take_exclusive_lock(fd);
+	/* code to read and write directory contents */
+	resctrl_release_lock(fd);
+}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5b7e43eff139..30b122987906 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1864,14 +1864,14 @@ static void __smp_spurious_interrupt(u8 vector)
 		"should never happen.\n", vector, smp_processor_id());
 }
 
-__visible void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
-__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
 	u8 vector = ~regs->orig_ax;
 
@@ -1922,14 +1922,14 @@ static void __smp_error_interrupt(struct pt_regs *regs)
 
 }
 
-__visible void smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_error_interrupt(regs);
 	exiting_irq();
 }
 
-__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_error_apic_entry(ERROR_APIC_VECTOR);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5d30c5e42bb1..f3557a1eb562 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 		__send_cleanup_vector(data);
 }
 
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index a5fd137417a2..9e655292cf10 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -814,14 +814,14 @@ static inline void __smp_deferred_error_interrupt(void)
 	deferred_error_int_vector();
 }
 
-asmlinkage __visible void smp_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
 {
 	entering_irq();
 	__smp_deferred_error_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
 {
 	entering_irq();
 	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 465aca8be009..772d9400930d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -404,14 +404,16 @@ static inline void __smp_thermal_interrupt(void)
 	smp_thermal_vector();
 }
 
-asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_thermal_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_trace_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 9beb092d68a5..bb0e75eed10a 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void)
 	mce_threshold_vector();
 }
 
-asmlinkage __visible void smp_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
 {
 	entering_irq();
 	__smp_threshold_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
 {
 	entering_irq();
 	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7c6e9ffe4424..4d8183b5f113 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void)
 		x86_platform_ipi_callback();
 }
 
-__visible void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
 }
 #endif
 
-__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 3512ba607361..275487872be2 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -9,6 +9,7 @@
 #include <linux/hardirq.h>
 #include <asm/apic.h>
 #include <asm/trace/irq_vectors.h>
+#include <linux/interrupt.h>
 
 static inline void __smp_irq_work_interrupt(void)
 {
@@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void)
 	irq_work_run();
 }
 
-__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_irq_work_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4cfba947d774..eb69b14dbfc8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -575,7 +575,9 @@ static void __init reserve_crashkernel(void)
 	/* 0 means: find the address automatically */
 	if (crash_base <= 0) {
 		/*
-		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+		 * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
+		 * as old kexec-tools loads bzImage below that, unless
+		 * "crashkernel=size[KMG],high" is specified.
 		 */
 		crash_base = memblock_find_in_range(CRASH_ALIGN,
 						    high ? CRASH_ADDR_HIGH_MAX
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 68f8cc222f25..d3c66a15bbde 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void)
 	scheduler_ipi();
 }
 
-__visible void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	__smp_reschedule_interrupt();
@@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
 	 */
 }
 
-__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
 {
 	/*
 	 * Need to call irq_enter() before calling the trace point.
@@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_call_function_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_call_function_single_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_call_function_single_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_single_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-02-11 18:18 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-02-11 18:18 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 146fbb766934dc003fcbf755b519acef683576bf x86/mm/ptdump: Fix soft lockup in page table walker

Last minute x86 fixes:

 - Fix a softlockup detector warning and long delays if using ptdump with KASAN
   enabled.

 - Two more TSC-adjust fixes for interesting firmware interactions.

 - Two commits to fix an AMD CPU topology enumeration bug that caused a 
   measurable gaming performance regression.

 Thanks,

	Ingo

------------------>
Andrey Ryabinin (1):
      x86/mm/ptdump: Fix soft lockup in page table walker

Borislav Petkov (1):
      x86/CPU/AMD: Bring back Compute Unit ID

Thomas Gleixner (2):
      x86/tsc: Avoid the large time jump when sanitizing TSC ADJUST
      x86/tsc: Make the TSC ADJUST sanitizing work for tsc_reliable

Yazen Ghannam (1):
      x86/CPU/AMD: Fix Zen SMT topology


 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/amd.c        | 16 +++++++++++++++-
 arch/x86/kernel/cpu/common.c     |  1 +
 arch/x86/kernel/smpboot.c        | 12 +++++++++---
 arch/x86/kernel/tsc.c            |  5 +++--
 arch/x86/kernel/tsc_sync.c       | 16 +++++++---------
 arch/x86/mm/dump_pagetables.c    |  2 ++
 7 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..e6cfe7ba2d65 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -104,6 +104,7 @@ struct cpuinfo_x86 {
 	__u8			x86_phys_bits;
 	/* CPUID returned core id bits: */
 	__u8			x86_coreid_bits;
+	__u8			cu_id;
 	/* Max extended CPUID function supported: */
 	__u32			extended_cpuid_level;
 	/* Maximum supported CPUID level, -1=no CPUID: */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1d3167269a67..2b4cf04239b6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,8 +309,22 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 eax, ebx, ecx, edx;
 
-		node_id = cpuid_ecx(0x8000001e) & 7;
+		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+		node_id  = ecx & 0xff;
+		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+
+		if (c->x86 == 0x15)
+			c->cu_id = ebx & 0xff;
+
+		if (c->x86 >= 0x17) {
+			c->cpu_core_id = ebx & 0xff;
+
+			if (smp_num_siblings > 1)
+				c->x86_max_cores /= smp_num_siblings;
+		}
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bab7a8a4293..ede03e849a8b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_max_cores = 1;
 	c->x86_coreid_bits = 0;
+	c->cu_id = 0xff;
 #ifdef CONFIG_X86_64
 	c->x86_clflush_size = 64;
 	c->x86_phys_bits = 36;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..99b920d0e516 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 		int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
 
 		if (c->phys_proc_id == o->phys_proc_id &&
-		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
-		    c->cpu_core_id == o->cpu_core_id)
-			return topology_sane(c, o, "smt");
+		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
+			if (c->cpu_core_id == o->cpu_core_id)
+				return topology_sane(c, o, "smt");
+
+			if ((c->cu_id != 0xff) &&
+			    (o->cu_id != 0xff) &&
+			    (c->cu_id == o->cu_id))
+				return topology_sane(c, o, "smt");
+		}
 
 	} else if (c->phys_proc_id == o->phys_proc_id &&
 		   c->cpu_core_id == o->cpu_core_id) {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e41af597aed8..37e7cf544e51 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1356,6 +1356,9 @@ void __init tsc_init(void)
 		(unsigned long)cpu_khz / 1000,
 		(unsigned long)cpu_khz % 1000);
 
+	/* Sanitize TSC ADJUST before cyc2ns gets initialized */
+	tsc_store_and_check_tsc_adjust(true);
+
 	/*
 	 * Secondary CPUs do not run through tsc_init(), so set up
 	 * all the scale factors for all CPUs, assuming the same
@@ -1386,8 +1389,6 @@ void __init tsc_init(void)
 
 	if (unsynchronized_tsc())
 		mark_tsc_unstable("TSCs unsynchronized");
-	else
-		tsc_store_and_check_tsc_adjust(true);
 
 	check_system_tsc_reliable();
 
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index d0db011051a5..728f75378475 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu)
 	if (unsynchronized_tsc())
 		return;
 
-	if (tsc_clocksource_reliable) {
-		if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
-			pr_info(
-			"Skipped synchronization checks as TSC is reliable.\n");
-		return;
-	}
-
 	/*
 	 * Set the maximum number of test runs to
 	 *  1 if the CPU does not provide the TSC_ADJUST MSR
@@ -380,14 +373,19 @@ void check_tsc_sync_target(void)
 	int cpus = 2;
 
 	/* Also aborts if there is no TSC. */
-	if (unsynchronized_tsc() || tsc_clocksource_reliable)
+	if (unsynchronized_tsc())
 		return;
 
 	/*
 	 * Store, verify and sanitize the TSC adjust register. If
 	 * successful skip the test.
+	 *
+	 * The test is also skipped when the TSC is marked reliable. This
+	 * is true for SoCs which have no fallback clocksource. On these
+	 * SoCs the TSC is frequency synchronized, but still the TSC ADJUST
+	 * register might have been wreckaged by the BIOS..
 	 */
-	if (tsc_store_and_check_tsc_adjust(false)) {
+	if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
 		atomic_inc(&skip_test);
 		return;
 	}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..8aa6bea1cd6c 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -15,6 +15,7 @@
 #include <linux/debugfs.h>
 #include <linux/mm.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
@@ -406,6 +407,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 		} else
 			note_page(m, &st, __pgprot(0), 1);
 
+		cond_resched();
 		start++;
 	}
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-02-02 21:04 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-02-02 21:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 0becc0ae5b42828785b589f686725ff5bc3b9b25 x86/mce: Make timer handling more robust

Misc fixes:

 - two microcode loader fixes

 - two FPU xstate handling fixes

 - an MCE timer handling related crash fix

 Thanks,

	Ingo

------------------>
Borislav Petkov (2):
      x86/microcode/intel: Drop stashed AP patch pointer optimization
      x86/microcode: Do not access the initrd after it has been freed

Kevin Hao (1):
      x86/fpu: Set the xcomp_bv when we fake up a XSAVES area

Thomas Gleixner (1):
      x86/mce: Make timer handling more robust

Yu-cheng Yu (1):
      x86/fpu/xstate: Fix xcomp_bv in XSAVES header


 arch/x86/include/asm/microcode.h      |  1 +
 arch/x86/kernel/cpu/mcheck/mce.c      | 31 ++++++++++++-------------------
 arch/x86/kernel/cpu/microcode/amd.c   |  5 +++--
 arch/x86/kernel/cpu/microcode/core.c  | 22 +++++++++++++++++-----
 arch/x86/kernel/cpu/microcode/intel.c |  9 +--------
 arch/x86/kernel/fpu/core.c            |  4 +++-
 6 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 38711df3bcb5..2266f864b747 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -140,6 +140,7 @@ extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 void reload_early_microcode(void);
 extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+extern bool initrd_gone;
 #else
 static inline int __init microcode_init(void)			{ return 0; };
 static inline void __init load_ucode_bsp(void)			{ }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 00ef43233e03..537c6647d84c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
 
 static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
-static void __restart_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct timer_list *t, unsigned long interval)
 {
 	unsigned long when = jiffies + interval;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	if (timer_pending(t)) {
-		if (time_before(when, t->expires))
-			mod_timer(t, when);
-	} else {
-		t->expires = round_jiffies(when);
-		add_timer_on(t, smp_processor_id());
-	}
+	if (!timer_pending(t) || time_before(when, t->expires))
+		mod_timer(t, round_jiffies(when));
 
 	local_irq_restore(flags);
 }
@@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data)
 
 done:
 	__this_cpu_write(mce_next_interval, iv);
-	__restart_timer(t, iv);
+	__start_timer(t, iv);
 }
 
 /*
@@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval)
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	unsigned long iv = __this_cpu_read(mce_next_interval);
 
-	__restart_timer(t, interval);
+	__start_timer(t, interval);
 
 	if (interval < iv)
 		__this_cpu_write(mce_next_interval, interval);
@@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
 	}
 }
 
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(struct timer_list *t)
 {
 	unsigned long iv = check_interval * HZ;
 
 	if (mca_cfg.ignore_ce || !iv)
 		return;
 
-	per_cpu(mce_next_interval, cpu) = iv;
-
-	t->expires = round_jiffies(jiffies + iv);
-	add_timer_on(t, cpu);
+	this_cpu_write(mce_next_interval, iv);
+	__start_timer(t, iv);
 }
 
 static void __mcheck_cpu_setup_timer(void)
@@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void)
 	unsigned int cpu = smp_processor_id();
 
 	setup_pinned_timer(t, mce_timer_fn, cpu);
-	mce_start_timer(cpu, t);
+	mce_start_timer(t);
 }
 
 /* Handle unconfigured int18 (should never happen) */
@@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu)
 
 static int mce_cpu_online(unsigned int cpu)
 {
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	int ret;
 
 	mce_device_create(cpu);
@@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu)
 		return ret;
 	}
 	mce_reenable_cpu();
-	mce_start_timer(cpu, t);
+	mce_start_timer(t);
 	return 0;
 }
 
 static int mce_cpu_pre_down(unsigned int cpu)
 {
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
 
 	mce_disable_cpu();
 	del_timer_sync(t);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6a31e2691f3a..079e81733a58 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -384,8 +384,9 @@ void load_ucode_amd_ap(unsigned int family)
 reget:
 		if (!get_builtin_microcode(&cp, family)) {
 #ifdef CONFIG_BLK_DEV_INITRD
-			cp = find_cpio_data(ucode_path, (void *)initrd_start,
-					    initrd_end - initrd_start, NULL);
+			if (!initrd_gone)
+				cp = find_cpio_data(ucode_path, (void *)initrd_start,
+						    initrd_end - initrd_start, NULL);
 #endif
 			if (!(cp.data && cp.size)) {
 				/*
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2af69d27da62..73102d932760 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -46,6 +46,8 @@
 static struct microcode_ops	*microcode_ops;
 static bool dis_ucode_ldr = true;
 
+bool initrd_gone;
+
 LIST_HEAD(microcode_cache);
 
 /*
@@ -190,21 +192,24 @@ void load_ucode_ap(void)
 static int __init save_microcode_in_initrd(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+	int ret = -EINVAL;
 
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		if (c->x86 >= 6)
-			return save_microcode_in_initrd_intel();
+			ret = save_microcode_in_initrd_intel();
 		break;
 	case X86_VENDOR_AMD:
 		if (c->x86 >= 0x10)
-			return save_microcode_in_initrd_amd(c->x86);
+			ret = save_microcode_in_initrd_amd(c->x86);
 		break;
 	default:
 		break;
 	}
 
-	return -EINVAL;
+	initrd_gone = true;
+
+	return ret;
 }
 
 struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
@@ -247,9 +252,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 	 * has the virtual address of the beginning of the initrd. It also
 	 * possibly relocates the ramdisk. In either case, initrd_start contains
 	 * the updated address so use that instead.
+	 *
+	 * initrd_gone is for the hotplug case where we've thrown out initrd
+	 * already.
 	 */
-	if (!use_pa && initrd_start)
-		start = initrd_start;
+	if (!use_pa) {
+		if (initrd_gone)
+			return (struct cpio_data){ NULL, 0, "" };
+		if (initrd_start)
+			start = initrd_start;
+	}
 
 	return find_cpio_data(path, (void *)start, size, NULL);
 #else /* !CONFIG_BLK_DEV_INITRD */
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 3f329b74e040..8325d8a09ab0 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -41,7 +41,7 @@
 
 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 
-/* Current microcode patch used in early patching */
+/* Current microcode patch used in early patching on the APs. */
 struct microcode_intel *intel_ucode_patch;
 
 static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
@@ -607,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void)
 	struct ucode_cpu_info uci;
 	struct cpio_data cp;
 
-	/*
-	 * AP loading didn't find any microcode patch, no need to save anything.
-	 */
-	if (!intel_ucode_patch || IS_ERR(intel_ucode_patch))
-		return 0;
-
 	if (!load_builtin_intel_microcode(&cp))
 		cp = find_microcode_in_initrd(ucode_path, false);
 
@@ -628,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void)
 	return 0;
 }
 
-
 /*
  * @res_patch, output: a pointer to the patch we found.
  */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e4e97a5355ce..de7234401275 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -9,6 +9,7 @@
 #include <asm/fpu/regset.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/types.h>
+#include <asm/fpu/xstate.h>
 #include <asm/traps.h>
 
 #include <linux/hardirq.h>
@@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state)
 	 * it will #GP. Make sure it is replaced after the memset().
 	 */
 	if (static_cpu_has(X86_FEATURE_XSAVES))
-		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
+		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+					       xfeatures_mask;
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-01-15 10:06 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2017-01-15 10:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Peter Zijlstra

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 453828625731d0ba7218242ef6ec88f59408f368 x86/mpx: Use compatible types in comparison to fix sparse error

Misc fixes:

 - unwinder fixes
 - AMD CPU topology enumeration fixes
 - microcode loader fixes
 - x86 embedded platform fixes
 - fix for a bootup crash that may trigger when clearcpuid= is used with invalid values

 Thanks,

	Ingo

------------------>
Andy Shevchenko (2):
      x86/cpu: Fix typo in the comment for Anniedale
      x86/platform/intel-mid: Rename 'spidev' to 'mrfld_spidev'

Borislav Petkov (4):
      x86/CPU/AMD: Fix Bulldozer topology
      x86/CPU: Add native CPUID variants returning a single datum
      x86/microcode: Use native CPUID to tickle out microcode revision
      x86/microcode/intel: Add a helper which gives the microcode revision

Josh Poimboeuf (4):
      x86/unwind: Silence warnings for non-current tasks
      x86/unwind: Disable KASAN checks for non-current tasks
      x86/unwind: Include __schedule() in stack traces
      x86/entry: Fix the end of the stack for newly forked tasks

Junichi Nomura (2):
      x86/microcode/intel: Fix allocation size of struct ucode_patch
      x86/microcode/intel: Use correct buffer size for saving microcode data

Len Brown (1):
      x86/tsc: Add the Intel Denverton Processor to native_calibrate_tsc()

Lukasz Odzioba (1):
      x86/cpu: Fix bootup crashes by sanitizing the argument of the 'clearcpuid=' command-line option

Nicholas Mc Guire (1):
      x86/boot: Add missing declaration of string functions

Tobias Klauser (1):
      x86/mpx: Use compatible types in comparison to fix sparse error


 arch/x86/boot/string.c                             |  1 +
 arch/x86/boot/string.h                             |  9 +++
 arch/x86/entry/entry_32.S                          | 30 ++++------
 arch/x86/entry/entry_64.S                          | 11 ++--
 arch/x86/include/asm/intel-family.h                |  2 +-
 arch/x86/include/asm/microcode_intel.h             | 15 +++++
 arch/x86/include/asm/processor.h                   | 18 ++++++
 arch/x86/include/asm/stacktrace.h                  |  2 +-
 arch/x86/include/asm/switch_to.h                   | 10 +++-
 arch/x86/kernel/cpu/amd.c                          |  9 +--
 arch/x86/kernel/cpu/common.c                       |  2 +-
 arch/x86/kernel/cpu/intel.c                        | 11 +---
 arch/x86/kernel/cpu/microcode/intel.c              | 70 ++++++----------------
 arch/x86/kernel/tsc.c                              |  1 +
 arch/x86/kernel/unwind_frame.c                     | 30 +++++++++-
 arch/x86/mm/mpx.c                                  |  2 +-
 arch/x86/platform/intel-mid/device_libs/Makefile   |  2 +-
 .../{platform_spidev.c => platform_mrfld_spidev.c} |  4 ++
 18 files changed, 129 insertions(+), 100 deletions(-)
 rename arch/x86/platform/intel-mid/device_libs/{platform_spidev.c => platform_mrfld_spidev.c} (91%)

diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index cc3bd583dce1..9e240fcba784 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include "ctype.h"
+#include "string.h"
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 725e820602b1..113588ddb43f 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len);
 #define memset(d,c,l) __builtin_memset(d,c,l)
 #define memcmp	__builtin_memcmp
 
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+					  unsigned int base);
+
 #endif /* BOOT_STRING_H */
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 701d29f8e4d3..57f7ec35216e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -255,23 +255,6 @@ ENTRY(__switch_to_asm)
 END(__switch_to_asm)
 
 /*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-ENTRY(schedule_tail_wrapper)
-	FRAME_BEGIN
-
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-
-	FRAME_END
-	ret
-ENDPROC(schedule_tail_wrapper)
-/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -279,15 +262,24 @@ ENDPROC(schedule_tail_wrapper)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	call	schedule_tail_wrapper
+	FRAME_BEGIN		/* help unwinder find end of stack */
+
+	/*
+	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
+	 * on the stack.
+	 */
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	movl    %esp, %eax
+	leal	FRAME_OFFSET(%esp), %eax
 	call    syscall_return_slowpath
+	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5b219707c2f2..044d18ebc43c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -408,17 +409,19 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
+	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail			/* rdi: 'prev' task parameter */
+	call	schedule_tail		/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx			/* from kernel_thread? */
-	jnz	1f				/* kernel threads are uncommon */
+	testq	%rbx, %rbx		/* from kernel_thread? */
+	jnz	1f			/* kernel threads are uncommon */
 
 2:
-	movq	%rsp, %rdi
+	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
+	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc076d3..8167fdb67ae8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -57,7 +57,7 @@
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 195becc6f780..e793fc9a9b20 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -52,6 +52,21 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
+static inline u32 intel_get_microcode_revision(void)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+	return rev;
+}
+
 #ifdef CONFIG_MICROCODE_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eaf100508c36..1be64da0384e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 	    : "memory");
 }
 
+#define native_cpuid_reg(reg)					\
+static inline unsigned int native_cpuid_##reg(unsigned int op)	\
+{								\
+	unsigned int eax = op, ebx, ecx = 0, edx;		\
+								\
+	native_cpuid(&eax, &ebx, &ecx, &edx);			\
+								\
+	return reg;						\
+}
+
+/*
+ * Native CPUID functions returning a single datum.
+ */
+native_cpuid_reg(eax)
+native_cpuid_reg(ebx)
+native_cpuid_reg(ecx)
+native_cpuid_reg(edx)
+
 static inline void load_cr3(pgd_t *pgdir)
 {
 	write_cr3(__pa(pgdir));
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a3269c897ec5..2e41c50ddf47 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -58,7 +58,7 @@ get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
+	return &((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 5cb436acd463..fcc5cd387fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev,
 
 asmlinkage void ret_from_fork(void);
 
-/* data that is pointed to by thread.sp */
+/*
+ * This is the structure pointed to by thread.sp for an inactive task.  The
+ * order of the fields must match the code in __switch_to_asm().
+ */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
 	unsigned long r15;
@@ -48,6 +51,11 @@ struct inactive_task_frame {
 	unsigned long di;
 #endif
 	unsigned long bx;
+
+	/*
+	 * These two fields must be together.  They form a stack frame header,
+	 * needed by get_frame_pointer().
+	 */
 	unsigned long bp;
 	unsigned long ret_addr;
 };
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 71cae73a5076..1d3167269a67 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		u32 eax, ebx, ecx, edx;
 
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		node_id = ecx & 7;
-
-		/* get compute unit information */
-		smp_num_siblings = ((ebx >> 8) & 3) + 1;
-		c->x86_max_cores /= smp_num_siblings;
-		c->cpu_core_id = ebx & 0xff;
+		node_id = cpuid_ecx(0x8000001e) & 7;
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dc1697ca5191..9bab7a8a4293 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1221,7 +1221,7 @@ static __init int setup_disablecpuid(char *arg)
 {
 	int bit;
 
-	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
 		setup_clear_cpu_cap(bit);
 	else
 		return 0;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcd484d2bb03..203f860d2ab3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -14,6 +14,7 @@
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/intel-family.h>
+#include <asm/microcode_intel.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -78,14 +79,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
-	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
-		unsigned lower_word;
-
-		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-		/* Required by the SDM */
-		sync_core();
-		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
-	}
+	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
+		c->microcode = intel_get_microcode_revision();
 
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index b624b54912e1..3f329b74e040 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -150,7 +150,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
 {
 	struct ucode_patch *p;
 
-	p = kzalloc(size, GFP_KERNEL);
+	p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL);
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
@@ -368,26 +368,6 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
 	return patch;
 }
 
-static void cpuid_1(void)
-{
-	/*
-	 * According to the Intel SDM, Volume 3, 9.11.7:
-	 *
-	 *   CPUID returns a value in a model specific register in
-	 *   addition to its usual register return values. The
-	 *   semantics of CPUID cause it to deposit an update ID value
-	 *   in the 64-bit model-specific register at address 08BH
-	 *   (IA32_BIOS_SIGN_ID). If no update is present in the
-	 *   processor, the value in the MSR remains unmodified.
-	 *
-	 * Use native_cpuid -- this code runs very early and we don't
-	 * want to mess with paravirt.
-	 */
-	unsigned int eax = 1, ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-}
-
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
@@ -410,15 +390,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
 	}
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
 
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	csig.rev = val[1];
+	csig.rev = intel_get_microcode_revision();
 
 	uci->cpu_sig = csig;
 	uci->valid = 1;
@@ -602,7 +575,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci)
 static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 {
 	struct microcode_intel *mc;
-	unsigned int val[2];
+	u32 rev;
 
 	mc = uci->mc;
 	if (!mc)
@@ -610,21 +583,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 
 	/* write microcode via MSR 0x79 */
 	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc->hdr.rev)
+	rev = intel_get_microcode_revision();
+	if (rev != mc->hdr.rev)
 		return -1;
 
 #ifdef CONFIG_X86_64
 	/* Flush global tlb. This is precaution. */
 	flush_tlb_early();
 #endif
-	uci->cpu_sig.rev = val[1];
+	uci->cpu_sig.rev = rev;
 
 	if (early)
 		print_ucode(uci);
@@ -804,8 +772,8 @@ static int apply_microcode_intel(int cpu)
 	struct microcode_intel *mc;
 	struct ucode_cpu_info *uci;
 	struct cpuinfo_x86 *c;
-	unsigned int val[2];
 	static int prev_rev;
+	u32 rev;
 
 	/* We should bind the task to the CPU */
 	if (WARN_ON(raw_smp_processor_id() != cpu))
@@ -822,33 +790,28 @@ static int apply_microcode_intel(int cpu)
 
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
 
-	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+	rev = intel_get_microcode_revision();
 
-	if (val[1] != mc->hdr.rev) {
+	if (rev != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
 		       cpu, mc->hdr.rev);
 		return -1;
 	}
 
-	if (val[1] != prev_rev) {
+	if (rev != prev_rev) {
 		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
-			val[1],
+			rev,
 			mc->hdr.date & 0xffff,
 			mc->hdr.date >> 24,
 			(mc->hdr.date >> 16) & 0xff);
-		prev_rev = val[1];
+		prev_rev = rev;
 	}
 
 	c = &cpu_data(cpu);
 
-	uci->cpu_sig.rev = val[1];
-	c->microcode = val[1];
+	uci->cpu_sig.rev = rev;
+	c->microcode = rev;
 
 	return 0;
 }
@@ -860,7 +823,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover = size;
-	unsigned int curr_mc_size = 0;
+	unsigned int curr_mc_size = 0, new_mc_size = 0;
 	unsigned int csig, cpf;
 
 	while (leftover) {
@@ -901,6 +864,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
+			new_mc_size = mc_size;
 			mc = NULL;	/* trigger new vmalloc */
 		}
 
@@ -926,7 +890,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	 * permanent memory. So it will be loaded early when a CPU is hot added
 	 * or resumes.
 	 */
-	save_mc_for_early(new_mc, curr_mc_size);
+	save_mc_for_early(new_mc, new_mc_size);
 
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index be3a49ee0356..e41af597aed8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void)
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
 		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
 		case INTEL_FAM6_ATOM_GOLDMONT:
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 4443e499f279..23d15565d02a 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -6,6 +6,21 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x)			\
+({							\
+	unsigned long val;				\
+	if (task == current)				\
+		val = READ_ONCE(x);			\
+	else						\
+		val = READ_ONCE_NOCHECK(x);		\
+	val;						\
+})
+
 static void unwind_dump(struct unwind_state *state, unsigned long *sp)
 {
 	static bool dumped_before = false;
@@ -48,7 +63,8 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (state->regs && user_mode(state->regs))
 		return 0;
 
-	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+	addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
+	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr,
 				     addr_p);
 
 	return __kernel_text_address(addr) ? addr : 0;
@@ -162,7 +178,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->regs)
 		next_bp = (unsigned long *)state->regs->bp;
 	else
-		next_bp = (unsigned long *)*state->bp;
+		next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp);
 
 	/* is the next frame pointer an encoded pointer to pt_regs? */
 	regs = decode_frame_pointer(next_bp);
@@ -207,6 +223,16 @@ bool unwind_next_frame(struct unwind_state *state)
 	return true;
 
 bad_address:
+	/*
+	 * When unwinding a non-current task, the task might actually be
+	 * running on another CPU, in which case it could be modifying its
+	 * stack while we're reading it.  This is generally not a problem and
+	 * can be ignored as long as the caller understands that unwinding
+	 * another task will not always succeed.
+	 */
+	if (state->task != current)
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 324e5713d386..af59f808742f 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 	 * We were not able to extract an address from the instruction,
 	 * probably because there was something invalid in it.
 	 */
-	if (info->si_addr == (void *)-1) {
+	if (info->si_addr == (void __user *)-1) {
 		err = -EINVAL;
 		goto err_out;
 	}
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 61b5ed2b7d40..90e4f2a6625b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
 obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
 obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
 # SPI Devices
-obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
similarity index 91%
rename from arch/x86/platform/intel-mid/device_libs/platform_spidev.c
rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
index 30c601b399ee..27186ad654c9 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
@@ -11,6 +11,7 @@
  * of the License.
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/sfi.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info)
 {
 	struct spi_board_info *spi_info = info;
 
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return ERR_PTR(-ENODEV);
+
 	spi_info->mode = SPI_MODE_0;
 	spi_info->controller_data = &spidev_spi_chip;
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-12-23 22:57 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-12-23 22:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c280f7736ab26a601932b1ce017a3840dbedcfdc Revert "x86/unwind: Detect bad stack return address"

There's a number of fixes:

 - a round of fixes for CPUID-less legacy CPUs
 - a number of microcode loader fixes
 - i8042 detection robustization fixes
 - stack dump/unwinder fixes
 - x86 SoC platform driver fixes
 - a GCC 7 warning fix
 - virtualization related fixes

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/input/serio/i8042-x86ia64io.h# d79e141c1c6e: Input: i8042 - Trust firmwar

 Thanks,

	Ingo

------------------>
Andy Lutomirski (5):
      x86/cpu: Probe CPUID leaf 6 even when cpuid_level == 6
      x86/asm/32: Make sync_core() handle missing CPUID on all 32-bit kernels
      Revert "x86/boot: Fail the boot if !M486 and CPUID is missing"
      x86/microcode/intel: Replace sync_core() with native_cpuid()
      x86/asm: Rewrite sync_core() to use IRET-to-self

Boris Ostrovsky (1):
      x86/microcode/AMD: Use native_cpuid() in load_ucode_amd_bsp()

Borislav Petkov (6):
      x86/microcode/AMD: Make find_proper_container() sane again
      x86/microcode/AMD: Sanitize apply_microcode_early_amd()
      x86/microcode/AMD: Do not load when running on a hypervisor
      x86/topology: Document cpu_llc_id
      x86/alternatives: Do not use sync_core() to serialize I$
      x86/microcode/AMD: Reload proper initrd start address

Dmitry Torokhov (4):
      x86/init: Add i8042 state to the platform data
      Input: i8042 - Trust firmware a bit more when probing on X86
      x86/init: Remove i8042_detect() from platform ops
      x86/init: Fix a couple of comment typos

Josh Poimboeuf (3):
      x86/unwind: Adjust last frame check for aligned function stacks
      x86/unwind: Dump stack data on warnings
      Revert "x86/unwind: Detect bad stack return address"

Linus Walleij (1):
      x86/platform/intel-mid: Switch MPU3050 driver to IIO

Markus Trippelsdorf (1):
      x86/tools: Fix gcc-7 warning in relocs.c

Nicolas Iooss (1):
      x86/platform/intel/quark: Add printf attribute to imr_self_test_result()

Peter Zijlstra (1):
      x86/paravirt: Mark unused patch_default label

Vitaly Kuznetsov (1):
      x86/hyperv: Handle unknown NMIs on one CPU when unknown_nmi_panic


 Documentation/x86/topology.txt                   |  9 +++
 arch/x86/boot/cpu.c                              |  6 --
 arch/x86/include/asm/processor.h                 | 80 +++++++++++++++++-------
 arch/x86/include/asm/unwind.h                    |  2 +-
 arch/x86/include/asm/x86_init.h                  | 26 ++++++--
 arch/x86/kernel/acpi/boot.c                      |  7 +++
 arch/x86/kernel/alternative.c                    | 15 +++--
 arch/x86/kernel/cpu/common.c                     |  7 ++-
 arch/x86/kernel/cpu/microcode/amd.c              | 56 ++++++++++-------
 arch/x86/kernel/cpu/microcode/core.c             | 40 +++++++-----
 arch/x86/kernel/cpu/microcode/intel.c            | 26 +++++++-
 arch/x86/kernel/cpu/mshyperv.c                   | 24 +++++++
 arch/x86/kernel/paravirt_patch_32.c              |  2 +-
 arch/x86/kernel/paravirt_patch_64.c              |  2 +-
 arch/x86/kernel/platform-quirks.c                |  5 ++
 arch/x86/kernel/unwind_frame.c                   | 56 ++++++++++++++---
 arch/x86/kernel/x86_init.c                       |  2 -
 arch/x86/platform/ce4100/ce4100.c                |  6 --
 arch/x86/platform/intel-mid/device_libs/Makefile |  2 +-
 arch/x86/platform/intel-mid/intel-mid.c          |  7 ---
 arch/x86/platform/intel-quark/imr_selftest.c     |  3 +-
 arch/x86/tools/relocs.c                          |  3 +-
 drivers/input/serio/i8042-x86ia64io.h            | 10 ++-
 23 files changed, 279 insertions(+), 117 deletions(-)

diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
index 06afac252f5b..f3e9d7e9ed6c 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.txt
@@ -63,6 +63,15 @@ historical nature and should be cleaned up.
     The maximum possible number of packages in the system. Helpful for per
     package facilities to preallocate per package information.
 
+  - cpu_llc_id:
+
+    A per-CPU variable containing:
+    - On Intel, the first APIC ID of the list of CPUs sharing the Last Level
+    Cache
+
+    - On AMD, the Node ID or Core Complex ID containing the Last Level
+    Cache. In general, it is a number identifying an LLC uniquely on the
+    system.
 
 * Cores:
 
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 4224ede43b4e..26240dde081e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -87,12 +87,6 @@ int validate_cpu(void)
 		return -1;
 	}
 
-	if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
-	    !has_eflag(X86_EFLAGS_ID)) {
-		printf("This kernel requires a CPU with the CPUID instruction.  Build with CONFIG_M486=y to run on this CPU.\n");
-		return -1;
-	}
-
 	if (err_flags) {
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6aa741fbe1df..eaf100508c36 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void)
 	rep_nop();
 }
 
-/* Stop speculative execution and prefetching of modified code. */
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ *  a) Text was modified using one virtual address and is about to be executed
+ *     from the same physical page at a different virtual address.
+ *
+ *  b) Text was modified on a different CPU, may subsequently be
+ *     executed on this CPU, and you want to make sure the new version
+ *     gets executed.  This generally means you're calling this in a IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ */
 static inline void sync_core(void)
 {
-	int tmp;
-
-#ifdef CONFIG_M486
 	/*
-	 * Do a CPUID if available, otherwise do a jump.  The jump
-	 * can conveniently enough be the jump around CPUID.
+	 * There are quite a few ways to do this.  IRET-to-self is nice
+	 * because it works on every CPU, at any CPL (so it's compatible
+	 * with paravirtualization), and it never exits to a hypervisor.
+	 * The only down sides are that it's a bit slow (it seems to be
+	 * a bit more than 2x slower than the fastest options) and that
+	 * it unmasks NMIs.  The "push %cs" is needed because, in
+	 * paravirtual environments, __KERNEL_CS may not be a valid CS
+	 * value when we do IRET directly.
+	 *
+	 * In case NMI unmasking or performance ever becomes a problem,
+	 * the next best option appears to be MOV-to-CR2 and an
+	 * unconditional jump.  That sequence also works on all CPUs,
+	 * but it will fault at CPL3 (i.e. Xen PV and lguest).
+	 *
+	 * CPUID is the conventional way, but it's nasty: it doesn't
+	 * exist on some 486-like CPUs, and it usually exits to a
+	 * hypervisor.
+	 *
+	 * Like all of Linux's memory ordering operations, this is a
+	 * compiler barrier as well.
 	 */
-	asm volatile("cmpl %2,%1\n\t"
-		     "jl 1f\n\t"
-		     "cpuid\n"
-		     "1:"
-		     : "=a" (tmp)
-		     : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+	register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_32
+	asm volatile (
+		"pushfl\n\t"
+		"pushl %%cs\n\t"
+		"pushl $1f\n\t"
+		"iret\n\t"
+		"1:"
+		: "+r" (__sp) : : "memory");
 #else
-	/*
-	 * CPUID is a barrier to speculative execution.
-	 * Prefetched instructions are automatically
-	 * invalidated when modified.
-	 */
-	asm volatile("cpuid"
-		     : "=a" (tmp)
-		     : "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+	unsigned int tmp;
+
+	asm volatile (
+		"mov %%ss, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq %%rsp\n\t"
+		"addq $8, (%%rsp)\n\t"
+		"pushfq\n\t"
+		"mov %%cs, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq $1f\n\t"
+		"iretq\n\t"
+		"1:"
+		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index c5a7f3a930dd..6fa75b17aec3 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,7 +12,7 @@ struct unwind_state {
 	struct task_struct *task;
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
-	unsigned long *bp;
+	unsigned long *bp, *orig_sp;
 	struct pt_regs *regs;
 #else
 	unsigned long *sp;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6ba793178441..7ba7e90a9ad6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -59,7 +59,7 @@ struct x86_init_irqs {
 
 /**
  * struct x86_init_oem - oem platform specific customizing functions
- * @arch_setup:			platform specific architecure setup
+ * @arch_setup:			platform specific architecture setup
  * @banner:			print a platform specific banner
  */
 struct x86_init_oem {
@@ -165,8 +165,25 @@ struct x86_legacy_devices {
 };
 
 /**
+ * enum x86_legacy_i8042_state - i8042 keyboard controller state
+ * @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on
+ *	given platform/subarch.
+ * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
+ *	is absent.
+ * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ *	present, the i8042 driver should probe for controller existence.
+ */
+enum x86_legacy_i8042_state {
+	X86_LEGACY_I8042_PLATFORM_ABSENT,
+	X86_LEGACY_I8042_FIRMWARE_ABSENT,
+	X86_LEGACY_I8042_EXPECTED_PRESENT,
+};
+
+/**
  * struct x86_legacy_features - legacy x86 features
  *
+ * @i8042: indicated if we expect the device to have i8042 controller
+ *	present.
  * @rtc: this device has a CMOS real-time clock present
  * @reserve_bios_regions: boot code will search for the EBDA address and the
  * 	start of the 640k - 1M BIOS region.  If false, the platform must
@@ -175,6 +192,7 @@ struct x86_legacy_devices {
  * 	documentation for further details.
  */
 struct x86_legacy_features {
+	enum x86_legacy_i8042_state i8042;
 	int rtc;
 	int reserve_bios_regions;
 	struct x86_legacy_devices devices;
@@ -188,15 +206,14 @@ struct x86_legacy_features {
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
  * @nmi_init			enable NMI on cpus
- * @i8042_detect		pre-detect if i8042 controller exists
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
- * @apic_post_init:		adjust apic if neeeded
+ * @apic_post_init:		adjust apic if needed
  * @legacy:			legacy features
  * @set_legacy_features:	override legacy features. Use of this callback
  * 				is highly discouraged. You should only need
  * 				this if your hardware platform requires further
- * 				custom fine tuning far beyong what may be
+ * 				custom fine tuning far beyond what may be
  * 				possible in x86_early_init_platform_quirks() by
  * 				only using the current x86_hardware_subarch
  * 				semantics.
@@ -210,7 +227,6 @@ struct x86_platform_ops {
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
 	unsigned char (*get_nmi_reason)(void);
-	int (*i8042_detect)(void);
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
 	void (*apic_post_init)(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6f65b0eed384..64422f850e95 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -930,6 +930,13 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 		x86_platform.legacy.devices.pnpbios = 0;
 	}
 
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    !(acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) &&
+	    x86_platform.legacy.i8042 != X86_LEGACY_I8042_PLATFORM_ABSENT) {
+		pr_debug("ACPI: i8042 controller is absent\n");
+		x86_platform.legacy.i8042 = X86_LEGACY_I8042_FIRMWARE_ABSENT;
+	}
+
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
 		pr_debug("ACPI: not registering RTC platform device\n");
 		x86_platform.legacy.rtc = 0;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cb272a7a5a3..c5b8f760473c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -337,7 +337,11 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
 		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
 }
 
-static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+/*
+ * "noinline" to cause control flow change and thus invalidate I$ and
+ * cause refetch after modification.
+ */
+static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
 	unsigned long flags;
 
@@ -346,7 +350,6 @@ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
 
 	local_irq_save(flags);
 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
-	sync_core();
 	local_irq_restore(flags);
 
 	DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
@@ -359,9 +362,12 @@ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
  * This implies that asymmetric systems where APs have less capabilities than
  * the boot processor are not handled. Tough. Make sure you disable such
  * features by hand.
+ *
+ * Marked "noinline" to cause control flow change and thus insn cache
+ * to refetch changed I$ lines.
  */
-void __init_or_module apply_alternatives(struct alt_instr *start,
-					 struct alt_instr *end)
+void __init_or_module noinline apply_alternatives(struct alt_instr *start,
+						  struct alt_instr *end)
 {
 	struct alt_instr *a;
 	u8 *instr, *replacement;
@@ -667,7 +673,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
 	unsigned long flags;
 	local_irq_save(flags);
 	memcpy(addr, opcode, len);
-	sync_core();
 	local_irq_restore(flags);
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1f6b50a449ab..dc1697ca5191 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -667,13 +667,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 		c->x86_capability[CPUID_1_EDX] = edx;
 	}
 
+	/* Thermal and Power Management Leaf: level 0x00000006 (eax) */
+	if (c->cpuid_level >= 0x00000006)
+		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
+
 	/* Additional Intel-defined flags: level 0x00000007 */
 	if (c->cpuid_level >= 0x00000007) {
 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
-
 		c->x86_capability[CPUID_7_0_EBX] = ebx;
-
-		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
 		c->x86_capability[CPUID_7_ECX] = ecx;
 	}
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6f353bdb3a25..6a31e2691f3a 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -116,10 +116,11 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
 
 /*
  * This scans the ucode blob for the proper container as we can have multiple
- * containers glued together.
+ * containers glued together. Returns the equivalence ID from the equivalence
+ * table or 0 if none found.
  */
-static struct container
-find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
+static u16
+find_proper_container(u8 *ucode, size_t size, struct container *ret_cont)
 {
 	struct container ret = { NULL, 0 };
 	u32 eax, ebx, ecx, edx;
@@ -138,7 +139,7 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
 	if (header[0] != UCODE_MAGIC ||
 	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
 	    header[2] == 0)                            /* size */
-		return ret;
+		return eq_id;
 
 	eax = 0x00000001;
 	ecx = 0;
@@ -163,8 +164,9 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
 			 * ucode update loop below
 			 */
 			left = ret.size - offset;
-			*ret_id = eq_id;
-			return ret;
+
+			*ret_cont = ret;
+			return eq_id;
 		}
 
 		/*
@@ -189,7 +191,7 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
 		ucode     = data;
 	}
 
-	return ret;
+	return eq_id;
 }
 
 static int __apply_microcode_amd(struct microcode_amd *mc_amd)
@@ -214,17 +216,18 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd)
  * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
  * load_microcode_amd() to save equivalent cpu table and microcode patches in
  * kernel heap memory.
+ *
+ * Returns true if container found (sets @ret_cont), false otherwise.
  */
-static struct container
-apply_microcode_early_amd(void *ucode, size_t size, bool save_patch)
+static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch,
+				      struct container *ret_cont)
 {
-	struct container ret = { NULL, 0 };
 	u8 (*patch)[PATCH_MAX_SIZE];
+	u32 rev, *header, *new_rev;
+	struct container ret;
 	int offset, left;
-	u32 rev, *header;
-	u8  *data;
 	u16 eq_id = 0;
-	u32 *new_rev;
+	u8  *data;
 
 #ifdef CONFIG_X86_32
 	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
@@ -235,11 +238,11 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch)
 #endif
 
 	if (check_current_patch_level(&rev, true))
-		return (struct container){ NULL, 0 };
+		return false;
 
-	ret = find_proper_container(ucode, size, &eq_id);
+	eq_id = find_proper_container(ucode, size, &ret);
 	if (!eq_id)
-		return (struct container){ NULL, 0 };
+		return false;
 
 	this_equiv_id = eq_id;
 	header = (u32 *)ret.data;
@@ -273,7 +276,11 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch)
 		data   += offset;
 		left   -= offset;
 	}
-	return ret;
+
+	if (ret_cont)
+		*ret_cont = ret;
+
+	return true;
 }
 
 static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -294,6 +301,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
 void __init load_ucode_amd_bsp(unsigned int family)
 {
 	struct ucode_cpu_info *uci;
+	u32 eax, ebx, ecx, edx;
 	struct cpio_data cp;
 	const char *path;
 	bool use_pa;
@@ -315,9 +323,12 @@ void __init load_ucode_amd_bsp(unsigned int family)
 		return;
 
 	/* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */
-	uci->cpu_sig.sig = cpuid_eax(1);
+	eax = 1;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	uci->cpu_sig.sig = eax;
 
-	apply_microcode_early_amd(cp.data, cp.size, true);
+	apply_microcode_early_amd(cp.data, cp.size, true, NULL);
 }
 
 #ifdef CONFIG_X86_32
@@ -349,7 +360,7 @@ void load_ucode_amd_ap(unsigned int family)
 	 * This would set amd_ucode_patch above so that the following APs can
 	 * use it directly instead of going down this path again.
 	 */
-	apply_microcode_early_amd(cp.data, cp.size, true);
+	apply_microcode_early_amd(cp.data, cp.size, true, NULL);
 }
 #else
 void load_ucode_amd_ap(unsigned int family)
@@ -387,8 +398,7 @@ void load_ucode_amd_ap(unsigned int family)
 			}
 		}
 
-		cont = apply_microcode_early_amd(cp.data, cp.size, false);
-		if (!(cont.data && cont.size)) {
+		if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) {
 			cont.size = -1;
 			return;
 		}
@@ -443,7 +453,7 @@ int __init save_microcode_in_initrd_amd(unsigned int fam)
 				return -EINVAL;
 			}
 
-			cont = find_proper_container(cp.data, cp.size, &eq_id);
+			eq_id = find_proper_container(cp.data, cp.size, &cont);
 			if (!eq_id) {
 				cont.size = -1;
 				return -EINVAL;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6996413c78c3..2af69d27da62 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -44,7 +44,7 @@
 #define DRIVER_VERSION	"2.2"
 
 static struct microcode_ops	*microcode_ops;
-static bool dis_ucode_ldr;
+static bool dis_ucode_ldr = true;
 
 LIST_HEAD(microcode_cache);
 
@@ -76,6 +76,7 @@ struct cpu_info_ctx {
 static bool __init check_loader_disabled_bsp(void)
 {
 	static const char *__dis_opt_str = "dis_ucode_ldr";
+	u32 a, b, c, d;
 
 #ifdef CONFIG_X86_32
 	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
@@ -88,8 +89,23 @@ static bool __init check_loader_disabled_bsp(void)
 	bool *res = &dis_ucode_ldr;
 #endif
 
-	if (cmdline_find_option_bool(cmdline, option))
-		*res = true;
+	if (!have_cpuid_p())
+		return *res;
+
+	a = 1;
+	c = 0;
+	native_cpuid(&a, &b, &c, &d);
+
+	/*
+	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
+	 * completely accurate as xen pv guests don't see that CPUID bit set but
+	 * that's good enough as they don't land on the BSP path anyway.
+	 */
+	if (c & BIT(31))
+		return *res;
+
+	if (cmdline_find_option_bool(cmdline, option) <= 0)
+		*res = false;
 
 	return *res;
 }
@@ -121,9 +137,6 @@ void __init load_ucode_bsp(void)
 	if (check_loader_disabled_bsp())
 		return;
 
-	if (!have_cpuid_p())
-		return;
-
 	vendor = x86_cpuid_vendor();
 	family = x86_cpuid_family();
 
@@ -157,9 +170,6 @@ void load_ucode_ap(void)
 	if (check_loader_disabled_ap())
 		return;
 
-	if (!have_cpuid_p())
-		return;
-
 	vendor = x86_cpuid_vendor();
 	family = x86_cpuid_family();
 
@@ -233,14 +243,12 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 # endif
 
 	/*
-	 * Did we relocate the ramdisk?
-	 *
-	 * So we possibly relocate the ramdisk *after* applying microcode on the
-	 * BSP so we rely on use_pa (use physical addresses) - even if it is not
-	 * absolutely correct - to determine whether we've done the ramdisk
-	 * relocation already.
+	 * Fixup the start address: after reserve_initrd() runs, initrd_start
+	 * has the virtual address of the beginning of the initrd. It also
+	 * possibly relocates the ramdisk. In either case, initrd_start contains
+	 * the updated address so use that instead.
 	 */
-	if (!use_pa && relocated_ramdisk)
+	if (!use_pa && initrd_start)
 		start = initrd_start;
 
 	return find_cpio_data(path, (void *)start, size, NULL);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 54d50c3694d8..b624b54912e1 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -368,6 +368,26 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
 	return patch;
 }
 
+static void cpuid_1(void)
+{
+	/*
+	 * According to the Intel SDM, Volume 3, 9.11.7:
+	 *
+	 *   CPUID returns a value in a model specific register in
+	 *   addition to its usual register return values. The
+	 *   semantics of CPUID cause it to deposit an update ID value
+	 *   in the 64-bit model-specific register at address 08BH
+	 *   (IA32_BIOS_SIGN_ID). If no update is present in the
+	 *   processor, the value in the MSR remains unmodified.
+	 *
+	 * Use native_cpuid -- this code runs very early and we don't
+	 * want to mess with paravirt.
+	 */
+	unsigned int eax = 1, ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+}
+
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
@@ -393,7 +413,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
+	cpuid_1();
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -593,7 +613,7 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
+	cpuid_1();
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -805,7 +825,7 @@ static int apply_microcode_intel(int cpu)
 	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
+	cpuid_1();
 
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 6c044543545e..f37e02e41a77 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -30,6 +30,7 @@
 #include <asm/apic.h>
 #include <asm/timer.h>
 #include <asm/reboot.h>
+#include <asm/nmi.h>
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -157,6 +158,26 @@ static unsigned char hv_get_nmi_reason(void)
 	return 0;
 }
 
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
+ * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle
+ * unknown NMI on the first CPU which gets it.
+ */
+static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
+{
+	static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+
+	if (!unknown_nmi_panic)
+		return NMI_DONE;
+
+	if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+		return NMI_HANDLED;
+
+	return NMI_DONE;
+}
+#endif
+
 static void __init ms_hyperv_init_platform(void)
 {
 	/*
@@ -182,6 +203,9 @@ static void __init ms_hyperv_init_platform(void)
 		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
 			lapic_timer_frequency);
 	}
+
+	register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
+			     "hv_nmi_unknown");
 #endif
 
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d33ef165b1f8..553acbbb4d32 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -68,7 +68,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 #endif
 
 	default:
-patch_default:
+patch_default: __maybe_unused
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
 
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index f4fcf26c9fce..11aaf1eaa0e4 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -80,7 +80,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 #endif
 
 	default:
-patch_default:
+patch_default: __maybe_unused
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
 
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 24a50301f150..91271122f0df 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -6,6 +6,7 @@
 
 void __init x86_early_init_platform_quirks(void)
 {
+	x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT;
 	x86_platform.legacy.rtc = 1;
 	x86_platform.legacy.reserve_bios_regions = 0;
 	x86_platform.legacy.devices.pnpbios = 1;
@@ -16,10 +17,14 @@ void __init x86_early_init_platform_quirks(void)
 		break;
 	case X86_SUBARCH_XEN:
 	case X86_SUBARCH_LGUEST:
+		x86_platform.legacy.devices.pnpbios = 0;
+		x86_platform.legacy.rtc = 0;
+		break;
 	case X86_SUBARCH_INTEL_MID:
 	case X86_SUBARCH_CE4100:
 		x86_platform.legacy.devices.pnpbios = 0;
 		x86_platform.legacy.rtc = 0;
+		x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
 		break;
 	}
 
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index ea7b7f9a3b9e..4443e499f279 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -6,6 +6,37 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
+static void unwind_dump(struct unwind_state *state, unsigned long *sp)
+{
+	static bool dumped_before = false;
+	bool prev_zero, zero = false;
+	unsigned long word;
+
+	if (dumped_before)
+		return;
+
+	dumped_before = true;
+
+	printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n",
+			state->stack_info.type, state->stack_info.next_sp,
+			state->stack_mask, state->graph_idx);
+
+	for (sp = state->orig_sp; sp < state->stack_info.end; sp++) {
+		word = READ_ONCE_NOCHECK(*sp);
+
+		prev_zero = zero;
+		zero = word == 0;
+
+		if (zero) {
+			if (!prev_zero)
+				printk_deferred("%p: %016x ...\n", sp, 0);
+			continue;
+		}
+
+		printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word);
+	}
+}
+
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
 	unsigned long addr;
@@ -20,15 +51,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
 				     addr_p);
 
-	if (!__kernel_text_address(addr)) {
-		printk_deferred_once(KERN_WARNING
-			"WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
-			(void *)addr, addr_p, state->task->comm,
-			state->task->pid);
-		return 0;
-	}
-
-	return addr;
+	return __kernel_text_address(addr) ? addr : 0;
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
@@ -46,7 +69,14 @@ static bool is_last_task_frame(struct unwind_state *state)
 	unsigned long bp = (unsigned long)state->bp;
 	unsigned long regs = (unsigned long)task_pt_regs(state->task);
 
-	return bp == regs - FRAME_HEADER_SIZE;
+	/*
+	 * We have to check for the last task frame at two different locations
+	 * because gcc can occasionally decide to realign the stack pointer and
+	 * change the offset of the stack frame by a word in the prologue of a
+	 * function called by head/entry code.
+	 */
+	return bp == regs - FRAME_HEADER_SIZE ||
+	       bp == regs - FRAME_HEADER_SIZE - sizeof(long);
 }
 
 /*
@@ -67,6 +97,7 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 			       size_t len)
 {
 	struct stack_info *info = &state->stack_info;
+	enum stack_type orig_type = info->type;
 
 	/*
 	 * If addr isn't on the current stack, switch to the next one.
@@ -80,6 +111,9 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 				   &state->stack_mask))
 			return false;
 
+	if (!state->orig_sp || info->type != orig_type)
+		state->orig_sp = addr;
+
 	return true;
 }
 
@@ -178,11 +212,13 @@ bool unwind_next_frame(struct unwind_state *state)
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
 			state->regs, state->task->comm,
 			state->task->pid, next_frame);
+		unwind_dump(state, (unsigned long *)state->regs);
 	} else {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
 			state->bp, state->task->comm,
 			state->task->pid, next_frame);
+		unwind_dump(state, state->bp);
 	}
 the_end:
 	state->stack_info.type = STACK_TYPE_UNKNOWN;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 0bd9f1287f39..11a93f005268 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -89,7 +89,6 @@ struct x86_cpuinit_ops x86_cpuinit = {
 };
 
 static void default_nmi_init(void) { };
-static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform __ro_after_init = {
 	.calibrate_cpu			= native_calibrate_cpu,
@@ -100,7 +99,6 @@ struct x86_platform_ops x86_platform __ro_after_init = {
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
 	.get_nmi_reason			= default_get_nmi_reason,
-	.i8042_detect			= default_i8042_detect,
 	.save_sched_clock_state 	= tsc_save_sched_clock_state,
 	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
 };
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 821cb41f00e6..ce4b06733c09 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -23,11 +23,6 @@
 #include <asm/io_apic.h>
 #include <asm/emergency-restart.h>
 
-static int ce4100_i8042_detect(void)
-{
-	return 0;
-}
-
 /*
  * The CE4100 platform has an internal 8051 Microcontroller which is
  * responsible for signaling to the external Power Management Unit the
@@ -145,7 +140,6 @@ static void sdv_pci_init(void)
 void __init x86_ce4100_early_setup(void)
 {
 	x86_init.oem.arch_setup = sdv_arch_setup;
-	x86_platform.i8042_detect = ce4100_i8042_detect;
 	x86_init.resources.probe_roms = x86_init_noop;
 	x86_init.mpparse.get_smp_config = x86_init_uint_noop;
 	x86_init.mpparse.find_smp_config = x86_init_noop;
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index dd6cfa4ad3ac..61b5ed2b7d40 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -19,7 +19,7 @@ obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
-obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o
+obj-$(subst m,y,$(CONFIG_MPU3050_I2C)) += platform_mpu3050.o
 obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
 obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
 # I2C GPIO Expanders
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 7850128f0026..12a272582cdc 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -161,12 +161,6 @@ static void intel_mid_arch_setup(void)
 	regulator_has_full_constraints();
 }
 
-/* MID systems don't have i8042 controller */
-static int intel_mid_i8042_detect(void)
-{
-	return 0;
-}
-
 /*
  * Moorestown does not have external NMI source nor port 0x61 to report
  * NMI status. The possible NMI sources are from pmu as a result of NMI
@@ -197,7 +191,6 @@ void __init x86_intel_mid_early_setup(void)
 	x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
 
 	x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
-	x86_platform.i8042_detect = intel_mid_i8042_detect;
 	x86_init.timers.wallclock_init = intel_mid_rtc_init;
 	x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
 
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index f5bad40936ac..b8f562049cad 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -25,7 +25,8 @@
  * @fmt:	format string.
  * ...		variadic argument list.
  */
-static void __init imr_self_test_result(int res, const char *fmt, ...)
+static __printf(2, 3)
+void __init imr_self_test_result(int res, const char *fmt, ...)
 {
 	va_list vlist;
 
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 0c2fae8d929d..73eb7fd4aec4 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -992,11 +992,12 @@ static void emit_relocs(int as_text, int use_real_mode)
 		die("Segment relocations found but --realmode not specified\n");
 
 	/* Order the relocations for more efficient processing */
-	sort_relocs(&relocs16);
 	sort_relocs(&relocs32);
 #if ELF_BITS == 64
 	sort_relocs(&relocs32neg);
 	sort_relocs(&relocs64);
+#else
+	sort_relocs(&relocs16);
 #endif
 
 	/* Print the relocations */
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 73a4e68448fc..77551f522202 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -983,7 +983,11 @@ static int __init i8042_pnp_init(void)
 #if defined(__ia64__)
 		return -ENODEV;
 #else
-		pr_info("PNP: No PS/2 controller found. Probing ports directly.\n");
+		pr_info("PNP: No PS/2 controller found.\n");
+		if (x86_platform.legacy.i8042 !=
+				X86_LEGACY_I8042_EXPECTED_PRESENT)
+			return -ENODEV;
+		pr_info("Probing ports directly.\n");
 		return 0;
 #endif
 	}
@@ -1070,8 +1074,8 @@ static int __init i8042_platform_init(void)
 
 #ifdef CONFIG_X86
 	u8 a20_on = 0xdf;
-	/* Just return if pre-detection shows no i8042 controller exist */
-	if (!x86_platform.i8042_detect())
+	/* Just return if platform does not have i8042 controller */
+	if (x86_platform.legacy.i8042 == X86_LEGACY_I8042_PLATFORM_ABSENT)
 		return -ENODEV;
 #endif
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-12-07 18:53 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-12-07 18:53 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 55f856e640560494518eaf24fe9d2d2089fba71a x86/unwind: Fix guess-unwinder regression

Misc fixes: a core dumping crash fix, a guess-unwinder regression fix, plus three 
build warning fixes.

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/platform/olpc: Fix resume handler build warning

Dan Carpenter (1):
      x86/apic/uv: Silence a shift wrapping warning

Dmitry Safonov (1):
      x86/coredump: Always use user_regs_struct for compat_elf_gregset_t

Josh Poimboeuf (1):
      x86/unwind: Fix guess-unwinder regression

Peter Foley (1):
      x86/build: Annotate die() with noreturn to fix build warning on clang


 arch/x86/include/asm/compat.h          | 4 +---
 arch/x86/kernel/apic/x2apic_uv_x.c     | 4 ++--
 arch/x86/kernel/unwind_guess.c         | 9 ++++++---
 arch/x86/platform/olpc/olpc-xo15-sci.c | 2 ++
 arch/x86/tools/relocs.h                | 2 +-
 5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 03d269bed941..24118c0b4640 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -272,7 +272,6 @@ struct compat_shmid64_ds {
 /*
  * The type of struct elf_prstatus.pr_reg in compatible core dumps.
  */
-#ifdef CONFIG_X86_X32_ABI
 typedef struct user_regs_struct compat_elf_gregset_t;
 
 /* Full regset -- prstatus on x32, otherwise on ia32 */
@@ -281,10 +280,9 @@ typedef struct user_regs_struct compat_elf_gregset_t;
   do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
   while (0)
 
+#ifdef CONFIG_X86_X32_ABI
 #define COMPAT_USE_64BIT_TIME \
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
-#else
-typedef struct user_regs_struct32 compat_elf_gregset_t;
 #endif
 
 /*
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index aeef53ce93e1..35690a168cf7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -815,9 +815,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
 				l = li;
 			}
 			addr1 = (base << shift) +
-				f * (unsigned long)(1 << m_io);
+				f * (1ULL << m_io);
 			addr2 = (base << shift) +
-				(l + 1) * (unsigned long)(1 << m_io);
+				(l + 1) * (1ULL << m_io);
 			pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
 				id, fi, li, lnasid, addr1, addr2);
 			if (max_io < l)
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index b80e8bf43cc6..22881ddcbb9f 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -7,11 +7,13 @@
 
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
-	unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+	unsigned long addr;
 
 	if (unwind_done(state))
 		return 0;
 
+	addr = READ_ONCE_NOCHECK(*state->sp);
+
 	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
 				     addr, state->sp);
 }
@@ -25,11 +27,12 @@ bool unwind_next_frame(struct unwind_state *state)
 		return false;
 
 	do {
-		unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+		for (state->sp++; state->sp < info->end; state->sp++) {
+			unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
 
-		for (state->sp++; state->sp < info->end; state->sp++)
 			if (__kernel_text_address(addr))
 				return true;
+		}
 
 		state->sp = info->next_sp;
 
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 55130846ac87..c0533fbc39e3 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -196,6 +196,7 @@ static int xo15_sci_remove(struct acpi_device *device)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int xo15_sci_resume(struct device *dev)
 {
 	/* Enable all EC events */
@@ -207,6 +208,7 @@ static int xo15_sci_resume(struct device *dev)
 
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume);
 
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index f59590645b68..1d23bf953a4a 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -16,7 +16,7 @@
 #include <regex.h>
 #include <tools/le_byteshift.h>
 
-void die(char *fmt, ...);
+void die(char *fmt, ...) __attribute__((noreturn));
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-11-22 15:41 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-11-22 15:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: e5dce2868818ca8706924f7bdc7939d481eefab0 x86/platform/intel-mid: Rename platform_wdt to platform_mrfld_wdt

Misc fixes:

 - two fixes to make (very) old Intel CPUs boot reliably
 - fix the intel-mid driver and rename it
 - two KASAN false positive fixes
 - an FPU fix
 - two sysfb fixes
 - two build fixes related to new toolchain versions

 Thanks,

	Ingo

------------------>
Andy Lutomirski (2):
      x86/traps: Ignore high word of regs->cs in early_fixup_exception()
      x86/boot: Fail the boot if !M486 and CPUID is missing

Andy Shevchenko (2):
      x86/platform/intel-mid: Register watchdog device after SCU
      x86/platform/intel-mid: Rename platform_wdt to platform_mrfld_wdt

Arnd Bergmann (1):
      x86/boot: Avoid warning for zero-filling .bss

David Herrmann (2):
      x86/sysfb: Add support for 64bit EFI lfb_base
      x86/sysfb: Fix lfb_size calculation

H.J. Lu (1):
      x86/build: Build compressed x86 kernels as PIE when !CONFIG_RELOCATABLE as well

Josh Poimboeuf (2):
      x86/unwind: Prevent KASAN false positive warnings in guess unwinder
      x86/dumpstack: Prevent KASAN false positive warnings

Yu-cheng Yu (1):
      x86/fpu: Fix invalid FPU ptrace state after execve()


 arch/x86/boot/compressed/Makefile                  |  5 ++-
 arch/x86/boot/cpu.c                                |  6 ++++
 arch/x86/kernel/dumpstack.c                        |  2 +-
 arch/x86/kernel/fpu/core.c                         | 16 ++++-----
 arch/x86/kernel/head_32.S                          |  9 +++--
 arch/x86/kernel/sysfb_simplefb.c                   | 39 +++++++++++++++++-----
 arch/x86/kernel/unwind_guess.c                     |  8 +++--
 arch/x86/mm/extable.c                              |  7 +++-
 arch/x86/platform/intel-mid/device_libs/Makefile   |  2 +-
 .../{platform_wdt.c => platform_mrfld_wdt.c}       | 34 +++++++++++++++----
 10 files changed, 95 insertions(+), 33 deletions(-)
 rename arch/x86/platform/intel-mid/device_libs/{platform_wdt.c => platform_mrfld_wdt.c} (65%)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 536ccfcc01c6..34d9e15857c3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -40,8 +40,8 @@ GCOV_PROFILE := n
 UBSAN_SANITIZE :=n
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
-ifeq ($(CONFIG_RELOCATABLE),y)
-# If kernel is relocatable, build compressed kernel as PIE.
+# Compressed kernel should be built as PIE since it may be loaded at any
+# address by the bootloader.
 ifeq ($(CONFIG_X86_32),y)
 LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
 else
@@ -51,7 +51,6 @@ else
 LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
 	&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
 endif
-endif
 LDFLAGS_vmlinux := -T
 
 hostprogs-y	:= mkpiggy
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 26240dde081e..4224ede43b4e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -87,6 +87,12 @@ int validate_cpu(void)
 		return -1;
 	}
 
+	if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
+	    !has_eflag(X86_EFLAGS_ID)) {
+		printf("This kernel requires a CPU with the CPUID instruction.  Build with CONFIG_M486=y to run on this CPU.\n");
+		return -1;
+	}
+
 	if (err_flags) {
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9b7cf5c28f5f..85f854b98a9d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		for (; stack < stack_info.end; stack++) {
 			unsigned long real_addr;
 			int reliable = 0;
-			unsigned long addr = *stack;
+			unsigned long addr = READ_ONCE_NOCHECK(*stack);
 			unsigned long *ret_addr_p =
 				unwind_get_return_address_ptr(&state);
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 47004010ad5d..ebb4e95fbd74 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
-	if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
-		/* FPU state will be reallocated lazily at the first use. */
-		fpu__drop(fpu);
-	} else {
-		if (!fpu->fpstate_active) {
-			fpu__activate_curr(fpu);
-			user_fpu_begin();
-		}
+	fpu__drop(fpu);
+
+	/*
+	 * Make sure fpstate is cleared and initialized.
+	 */
+	if (static_cpu_has(X86_FEATURE_FPU)) {
+		fpu__activate_curr(fpu);
+		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
 	}
 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b6b2f0264af3..2dabea46f039 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 #else
-ENTRY(initial_page_table)
+.globl initial_page_table
+initial_page_table:
 	.fill 1024,4,0
 #endif
 initial_pg_fixmap:
 	.fill 1024,4,0
-ENTRY(empty_zero_page)
+.globl empty_zero_page
+empty_zero_page:
 	.fill 4096,1,0
-ENTRY(swapper_pg_dir)
+.globl swapper_pg_dir
+swapper_pg_dir:
 	.fill 1024,4,0
 EXPORT_SYMBOL(empty_zero_page)
 
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
index 764a29f84de7..85195d447a92 100644
--- a/arch/x86/kernel/sysfb_simplefb.c
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si,
 {
 	struct platform_device *pd;
 	struct resource res;
-	unsigned long len;
+	u64 base, size;
+	u32 length;
 
-	/* don't use lfb_size as it may contain the whole VMEM instead of only
-	 * the part that is occupied by the framebuffer */
-	len = mode->height * mode->stride;
-	len = PAGE_ALIGN(len);
-	if (len > (u64)si->lfb_size << 16) {
+	/*
+	 * If the 64BIT_BASE capability is set, ext_lfb_base will contain the
+	 * upper half of the base address. Assemble the address, then make sure
+	 * it is valid and we can actually access it.
+	 */
+	base = si->lfb_base;
+	if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+		base |= (u64)si->ext_lfb_base << 32;
+	if (!base || (u64)(resource_size_t)base != base) {
+		printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Don't use lfb_size as IORESOURCE size, since it may contain the
+	 * entire VMEM, and thus require huge mappings. Use just the part we
+	 * need, that is, the part where the framebuffer is located. But verify
+	 * that it does not exceed the advertised VMEM.
+	 * Note that in case of VBE, the lfb_size is shifted by 16 bits for
+	 * historical reasons.
+	 */
+	size = si->lfb_size;
+	if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+		size <<= 16;
+	length = mode->height * mode->stride;
+	length = PAGE_ALIGN(length);
+	if (length > size) {
 		printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
 		return -EINVAL;
 	}
@@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si,
 	memset(&res, 0, sizeof(res));
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	res.name = simplefb_resname;
-	res.start = si->lfb_base;
-	res.end = si->lfb_base + len - 1;
+	res.start = base;
+	res.end = res.start + length - 1;
 	if (res.end <= res.start)
 		return -EINVAL;
 
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 2d721e533cf4..b80e8bf43cc6 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -7,11 +7,13 @@
 
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
+	unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
 	if (unwind_done(state))
 		return 0;
 
 	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
-				     *state->sp, state->sp);
+				     addr, state->sp);
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
@@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state)
 		return false;
 
 	do {
+		unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
 		for (state->sp++; state->sp < info->end; state->sp++)
-			if (__kernel_text_address(*state->sp))
+			if (__kernel_text_address(addr))
 				return true;
 
 		state->sp = info->next_sp;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 79ae939970d3..fcd06f7526de 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	if (early_recursion_flag > 2)
 		goto halt_loop;
 
-	if (regs->cs != __KERNEL_CS)
+	/*
+	 * Old CPUs leave the high bits of CS on the stack
+	 * undefined.  I'm not sure which CPUs do this, but at least
+	 * the 486 DX works this way.
+	 */
+	if ((regs->cs & 0xFFFF) != __KERNEL_CS)
 		goto fail;
 
 	/*
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 429d08be7848..dd6cfa4ad3ac 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
-obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
similarity index 65%
rename from arch/x86/platform/intel-mid/device_libs/platform_wdt.c
rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index de734134bc8d..3f1f1c77d090 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -1,5 +1,5 @@
 /*
- * platform_wdt.c: Watchdog platform library file
+ * Intel Merrifield watchdog platform device library file
  *
  * (C) Copyright 2014 Intel Corporation
  * Author: David Cohen <david.a.cohen@linux.intel.com>
@@ -14,7 +14,9 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/intel-mid_wdt.h>
+
 #include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 
 #define TANGIER_EXT_TIMER0_MSI 15
@@ -50,14 +52,34 @@ static struct intel_mid_wdt_pdata tangier_pdata = {
 	.probe = tangier_probe,
 };
 
-static int __init register_mid_wdt(void)
+static int wdt_scu_status_change(struct notifier_block *nb,
+				 unsigned long code, void *data)
 {
-	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
-		wdt_dev.dev.platform_data = &tangier_pdata;
-		return platform_device_register(&wdt_dev);
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&wdt_dev);
+		return 0;
 	}
 
-	return -ENODEV;
+	return platform_device_register(&wdt_dev);
 }
 
+static struct notifier_block wdt_scu_notifier = {
+	.notifier_call	= wdt_scu_status_change,
+};
+
+static int __init register_mid_wdt(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	wdt_dev.dev.platform_data = &tangier_pdata;
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before watchdog device
+	 * can be registered:
+	 */
+	intel_scu_notifier_add(&wdt_scu_notifier);
+
+	return 0;
+}
 rootfs_initcall(register_mid_wdt);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-11-14  8:03 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-11-14  8:03 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: d49597fd3bc7d9534de55e9256767f073be1b33a x86/cpu: Deal with broken firmware (VMWare/XEN)

Misc fixes:

 - fix an Intel/MID boot crash/hang bug
 - fix a cache topology mis-parsing bug on certain AMD CPUs
 - fix a virtualization firmware bug by adding a check+quirk workaround on the kernel side

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/pci/pci-mid.c              # e8a6123e9ead: x86/platform/intel-mid: Retr

 Thanks,

	Ingo

------------------>
Lukas Wunner (1):
      x86/platform/intel-mid: Retrofit pci_platform_pm_ops ->get_state hook

Thomas Gleixner (1):
      x86/cpu: Deal with broken firmware (VMWare/XEN)

Yazen Ghannam (1):
      x86/cpu/AMD: Fix cpu_llc_id for AMD Fam17h systems


 arch/x86/include/asm/intel-mid.h  |  1 +
 arch/x86/kernel/cpu/amd.c         |  6 +-----
 arch/x86/kernel/cpu/common.c      | 32 ++++++++++++++++++++++++++++++--
 arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++
 drivers/pci/pci-mid.c             |  6 ++++++
 5 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 5b6753d1f7f4..49da9f497b90 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -17,6 +17,7 @@
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev);
 
 extern void intel_mid_pwr_power_off(void);
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b81fe2d63e15..1e81a37c034e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
-	unsigned int socket_id, core_complex_id;
 
 	bits = c->x86_coreid_bits;
 	/* Low order bits define the core id (index of core in socket) */
@@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 	 if (c->x86 != 0x17 || !cpuid_edx(0x80000006))
 		return;
 
-	socket_id	= (c->apicid >> bits) - 1;
-	core_complex_id	= (c->apicid & ((1 << bits) - 1)) >> 3;
-
-	per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id;
+	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bd910a7dd0a..cc9e980c68ec 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -979,6 +979,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c)
 }
 
 /*
+ * The physical to logical package id mapping is initialized from the
+ * acpi/mptables information. Make sure that CPUID actually agrees with
+ * that.
+ */
+static void sanitize_package_id(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned int pkg, apicid, cpu = smp_processor_id();
+
+	apicid = apic->cpu_present_to_apicid(cpu);
+	pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+	if (apicid != c->initial_apicid) {
+		pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n",
+		       cpu, apicid, c->initial_apicid);
+		c->initial_apicid = apicid;
+	}
+	if (pkg != c->phys_proc_id) {
+		pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n",
+		       cpu, pkg, c->phys_proc_id);
+		c->phys_proc_id = pkg;
+	}
+	c->logical_proc_id = topology_phys_to_logical_pkg(pkg);
+#else
+	c->logical_proc_id = 0;
+#endif
+}
+
+/*
  * This does the hard work of actually picking apart the CPU stuff...
  */
 static void identify_cpu(struct cpuinfo_x86 *c)
@@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
-	/* The boot/hotplug time assigment got cleared, restore it */
-	c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
+	sanitize_package_id(c);
 }
 
 /*
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 5d3b45ad1c03..67375dda451c 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 }
 EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
 
+pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	struct mid_pwr *pwr = midpwr;
+	int id, reg, bit;
+	u32 power;
+
+	if (!pwr || !pwr->available)
+		return PCI_UNKNOWN;
+
+	id = intel_mid_pwr_get_lss_id(pdev);
+	if (id < 0)
+		return PCI_UNKNOWN;
+
+	reg = (id * LSS_PWS_BITS) / 32;
+	bit = (id * LSS_PWS_BITS) % 32;
+	power = mid_pwr_get_state(pwr, reg);
+	return (__force pci_power_t)((power >> bit) & 3);
+}
+
 void intel_mid_pwr_power_off(void)
 {
 	struct mid_pwr *pwr = midpwr;
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
index 55f453de562e..c7f3408e3148 100644
--- a/drivers/pci/pci-mid.c
+++ b/drivers/pci/pci-mid.c
@@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 	return intel_mid_pci_set_power_state(pdev, state);
 }
 
+static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	return intel_mid_pci_get_power_state(pdev);
+}
+
 static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
 {
 	return PCI_D3hot;
@@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev *dev)
 static struct pci_platform_pm_ops mid_pci_platform_pm = {
 	.is_manageable	= mid_pci_power_manageable,
 	.set_state	= mid_pci_set_power_state,
+	.get_state	= mid_pci_get_power_state,
 	.choose_state	= mid_pci_choose_state,
 	.sleep_wake	= mid_pci_sleep_wake,
 	.run_wake	= mid_pci_run_wake,

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-10-28  8:41 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-10-28  8:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1c27f646b18fb56308dff82784ca61951bad0b48 x86/microcode/AMD: Fix more fallout from CONFIG_RANDOMIZE_MEMORY=y

Misc fixes: three build fixes, an unwinder fix and a microcode loader fix.

 Thanks,

	Ingo

------------------>
Arnd Bergmann (1):
      x86/quirks: Hide maybe-uninitialized warning

Borislav Petkov (1):
      x86/microcode/AMD: Fix more fallout from CONFIG_RANDOMIZE_MEMORY=y

Jan Beulich (1):
      x86/build: Fix build with older GCC versions

Josh Poimboeuf (1):
      x86/unwind: Fix empty stack dereference in guess unwinder

Steven Rostedt (1):
      x86: Fix export for mcount and __fentry__


 arch/x86/entry/Makefile             | 4 ++--
 arch/x86/kernel/cpu/microcode/amd.c | 2 +-
 arch/x86/kernel/mcount_64.S         | 3 ++-
 arch/x86/kernel/quirks.c            | 3 +--
 arch/x86/kernel/unwind_guess.c      | 9 ++++++++-
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 77f28ce9c646..9976fcecd17e 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,8 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
-CFLAGS_syscall_64.o		+= -Wno-override-init
-CFLAGS_syscall_32.o		+= -Wno-override-init
+CFLAGS_syscall_64.o		+= $(call cc-option,-Wno-override-init,)
+CFLAGS_syscall_32.o		+= $(call cc-option,-Wno-override-init,)
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 620ab06bcf45..017bda12caae 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void)
 	 * We need the physical address of the container for both bitness since
 	 * boot_params.hdr.ramdisk_image is a physical address.
 	 */
-	cont    = __pa(container);
+	cont    = __pa_nodebug(container);
 	cont_va = container;
 #endif
 
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index efe73aacf966..7b0d3da52fb4 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -18,8 +18,10 @@
 
 #ifdef CC_USING_FENTRY
 # define function_hook	__fentry__
+EXPORT_SYMBOL(__fentry__)
 #else
 # define function_hook	mcount
+EXPORT_SYMBOL(mcount)
 #endif
 
 /* All cases save the original rbp (8 bytes) */
@@ -295,7 +297,6 @@ GLOBAL(ftrace_stub)
 	jmp fgraph_trace
 END(function_hook)
 #endif /* CONFIG_DYNAMIC_FTRACE */
-EXPORT_SYMBOL(function_hook)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 51402a7e4ca6..0bee04d41bed 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
 			amd_disable_seq_and_redirect_scrub);
 
-#endif
-
 #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 #include <linux/jump_label.h>
 #include <asm/string_64.h>
@@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
+#endif
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 9298993dc8b7..2d721e533cf4 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -47,7 +47,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	get_stack_info(first_frame, state->task, &state->stack_info,
 		       &state->stack_mask);
 
-	if (!__kernel_text_address(*first_frame))
+	/*
+	 * The caller can provide the address of the first frame directly
+	 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+	 * to start unwinding at.  Skip ahead until we reach it.
+	 */
+	if (!unwind_done(state) &&
+	    (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+	    !__kernel_text_address(*first_frame)))
 		unwind_next_frame(state);
 }
 EXPORT_SYMBOL_GPL(__unwind_start);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-10-22 11:16 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-10-22 11:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ff8560512b8d4b7ca3ef4fd69166634ac30b2525 x86/boot/smp: Don't try to poke disabled/non-existent APIC

Three fixes, a hw-enablement and a cross-arch fix/enablement change:

 - SGI/UV fix for older platforms

 - x32 signal handling fix

 - older x86 platform bootup APIC fix

 - AVX512-4VNNIW (Neural Network Instructions) and AVX512-4FMAPS (Multiply 
   Accumulation Single precision instructions) enablement.

 - move thread_info back into x86 specific code, to make life easier for other 
   architectures trying to make use of CONFIG_THREAD_INFO_IN_TASK_STRUCT=y.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/thread_info.h        # c8061485a0d7: sched/core, x86: Make struct

 Thanks,

	Ingo

------------------>
Alex Thorlton (1):
      x86/platform/UV: Fix support for EFI_OLD_MEMMAP after BIOS callback updates

Dmitry Safonov (1):
      x86/signal: Remove bogus user_64bit_mode() check from sigaction_compat_abi()

Heiko Carstens (1):
      sched/core, x86: Make struct thread_info arch specific again

Piotr Luc (1):
      x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

Renat Valiullin (1):
      x86/vmware: Skip timer_irq_works() check on VMware

Ville Syrjälä (1):
      x86/boot/smp: Don't try to poke disabled/non-existent APIC


 arch/x86/include/asm/cpufeatures.h       |  2 ++
 arch/x86/include/asm/thread_info.h       |  9 +++++++++
 arch/x86/kernel/cpu/scattered.c          |  2 ++
 arch/x86/kernel/cpu/vmware.c             |  5 +++++
 arch/x86/kernel/fpu/xstate.c             |  2 ++
 arch/x86/kernel/signal_compat.c          |  3 ---
 arch/x86/kernel/smpboot.c                | 16 +++++++++-------
 arch/x86/platform/uv/bios_uv.c           | 10 +++++++++-
 include/linux/thread_info.h              | 11 -----------
 tools/arch/x86/include/asm/cpufeatures.h |  2 ++
 10 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc849ee3..a39629206864 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2aaca53c0974..ad6f5eb07a95 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,6 +52,15 @@ struct task_struct;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
+struct thread_info {
+	unsigned long		flags;		/* low level flags */
+};
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.flags		= 0,			\
+}
+
 #define init_stack		(init_thread_union.stack)
 
 #else /* !__ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df9398d..1db8dc490b66 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
 	static const struct cpuid_bit cpuid_bits[] = {
 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 81160578b91a..5130985b758b 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -27,6 +27,7 @@
 #include <asm/div64.h>
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
+#include <asm/timer.h>
 #include <asm/apic.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
@@ -94,6 +95,10 @@ static void __init vmware_platform_setup(void)
 	} else {
 		pr_warn("Failed to get TSC freq from the hypervisor\n");
 	}
+
+#ifdef CONFIG_X86_IO_APIC
+	no_timer_check = 1;
+#endif
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c593f8..095ef7ddd6ae 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 40df33753bae..ec1f756f9dc9 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -105,9 +105,6 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
 	/* Don't let flags to be set from userspace */
 	act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
 
-	if (user_64bit_mode(current_pt_regs()))
-		return;
-
 	if (in_ia32_syscall())
 		act->sa.sa_flags |= SA_IA32_ABI;
 	if (in_x32_syscall())
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 951f093a96fe..42f5eb7b4f6c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1409,15 +1409,17 @@ __init void prefill_possible_map(void)
 
 	/* No boot processor was found in mptable or ACPI MADT */
 	if (!num_processors) {
-		int apicid = boot_cpu_physical_apicid;
-		int cpu = hard_smp_processor_id();
+		if (boot_cpu_has(X86_FEATURE_APIC)) {
+			int apicid = boot_cpu_physical_apicid;
+			int cpu = hard_smp_processor_id();
 
-		pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
+			pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
 
-		/* Make sure boot cpu is enumerated */
-		if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
-		    apic->apic_id_valid(apicid))
-			generic_processor_info(apicid, boot_cpu_apic_version);
+			/* Make sure boot cpu is enumerated */
+			if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
+			    apic->apic_id_valid(apicid))
+				generic_processor_info(apicid, boot_cpu_apic_version);
+		}
 
 		if (!num_processors)
 			num_processors = 1;
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index b4d5e95fe4df..4a6a5a26c582 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -40,7 +40,15 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 		 */
 		return BIOS_STATUS_UNIMPLEMENTED;
 
-	ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+	/*
+	 * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+	 * callback method, which uses efi_call() directly, with the kernel page tables:
+	 */
+	if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags)))
+		ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
+	else
+		ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(uv_bios_call);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 45f004e9cc59..2873baf5372a 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -14,17 +14,6 @@ struct timespec;
 struct compat_timespec;
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-struct thread_info {
-	unsigned long		flags;		/* low level flags */
-};
-
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.flags		= 0,			\
-}
-#endif
-
-#ifdef CONFIG_THREAD_INFO_IN_TASK
 #define current_thread_info() ((struct thread_info *)current)
 #endif
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1188bc849ee3..a39629206864 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-10-18 11:22 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-10-18 11:22 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 0e60439c22ec551e9b5780eba73ddfeb63ab63d9 Merge branch 'mm/pkeys' into x86/urgent, to pick up pkeys fix

Misc fixes, plus hw-enablement changes:

 - fix persistent RAM handling
 - remove pkeys warning
 - remove duplicate macro
 - fix debug warning in irq handler
 - Add new 'Knights Mill' CPU related constants and enable the perf bits

 Thanks,

	Ingo

------------------>
Dan Williams (1):
      x86/e820: Don't merge consecutive E820_PRAM ranges

Dave Jones (1):
      pkeys: Remove easily triggered WARN

Longpeng(Mike) (1):
      x86: Remove duplicate rtit status MSR macro

Piotr Luc (4):
      x86/cpu/intel: Add Knights Mill to Intel family
      perf/x86/intel: Add Knights Mill CPUID
      perf/x86/intel/rapl: Add Knights Mill CPUID
      perf/x86/intel/uncore: Add Knights Mill CPUID

Wanpeng Li (1):
      x86/smp: Add irq_enter/exit() in smp_reschedule_interrupt()


 arch/x86/events/intel/core.c        | 3 ++-
 arch/x86/events/intel/rapl.c        | 1 +
 arch/x86/events/intel/uncore.c      | 1 +
 arch/x86/include/asm/intel-family.h | 1 +
 arch/x86/include/asm/msr-index.h    | 1 -
 arch/x86/kernel/e820.c              | 2 +-
 arch/x86/kernel/smp.c               | 2 ++
 7 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a3a9eb84b5cf..eab0915f5995 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3898,6 +3898,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
 		memcpy(hw_cache_event_ids,
 		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs,
@@ -3912,7 +3913,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
-		pr_cont("Knights Landing events, ");
+		pr_cont("Knights Landing/Mill events, ");
 		break;
 
 	case INTEL_FAM6_SKYLAKE_MOBILE:
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index b0f0e835a770..0a535cea8ff3 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -763,6 +763,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d9844cc74486..efca2685d876 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1349,6 +1349,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  bdx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM,	  knl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 9ae5ab80a497..34a46dc076d3 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -64,5 +64,6 @@
 /* Xeon Phi */
 
 #define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
+#define INTEL_FAM6_XEON_PHI_KNM		0x85 /* Knights Mill */
 
 #endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c6676b29..78f3760ca1f2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -88,7 +88,6 @@
 
 #define MSR_IA32_RTIT_CTL		0x00000570
 #define MSR_IA32_RTIT_STATUS		0x00000571
-#define MSR_IA32_RTIT_STATUS		0x00000571
 #define MSR_IA32_RTIT_ADDR0_A		0x00000580
 #define MSR_IA32_RTIT_ADDR0_B		0x00000581
 #define MSR_IA32_RTIT_ADDR1_A		0x00000582
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index b85fe5f91c3f..90e8dde3ec26 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -350,7 +350,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 		 * continue building up new bios map based on this
 		 * information
 		 */
-		if (current_type != last_type) {
+		if (current_type != last_type || current_type == E820_PRAM) {
 			if (last_type != 0)	 {
 				new_bios[new_bios_entry].size =
 					change_point[chgidx]->addr - last_addr;
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 68f8cc222f25..c00cb64bc0a1 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -261,8 +261,10 @@ static inline void __smp_reschedule_interrupt(void)
 
 __visible void smp_reschedule_interrupt(struct pt_regs *regs)
 {
+	irq_enter();
 	ack_APIC_irq();
 	__smp_reschedule_interrupt();
+	irq_exit();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-09-13 18:20 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-09-13 18:20 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c291b015158577be533dd5a959dfc09bab119eed x86/apic: Fix num_processors value in case of failure

Three fixes:

- AMD microcode loading fix with randomization,

- an lguest tooling fix, 

- and an APIC enumeration boundary condition fix.

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/microcode/AMD: Fix load of builtin microcode with randomized memory

Daniel Baluta (1):
      tools/lguest: Don't bork the terminal in case of wrong args

Dou Liyang (1):
      x86/apic: Fix num_processors value in case of failure


 arch/x86/kernel/apic/apic.c         |  4 +++-
 arch/x86/kernel/cpu/microcode/amd.c | 13 ++++++++++---
 tools/lguest/lguest.c               |  6 +++---
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 50c95af0f017..f3e9b2df4b16 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2093,7 +2093,6 @@ int generic_processor_info(int apicid, int version)
 		return -EINVAL;
 	}
 
-	num_processors++;
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
 		 * x86_bios_cpu_apicid is required to have processors listed
@@ -2116,10 +2115,13 @@ int generic_processor_info(int apicid, int version)
 
 		pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
 			   thiscpu, apicid);
+
 		disabled_cpus++;
 		return -ENOSPC;
 	}
 
+	num_processors++;
+
 	/*
 	 * Validate version
 	 */
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index b816971f5da4..620ab06bcf45 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -54,6 +54,7 @@ static LIST_HEAD(pcache);
  */
 static u8 *container;
 static size_t container_size;
+static bool ucode_builtin;
 
 static u32 ucode_new_rev;
 static u8 amd_ucode_patch[PATCH_MAX_SIZE];
@@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
 void __init load_ucode_amd_bsp(unsigned int family)
 {
 	struct cpio_data cp;
+	bool *builtin;
 	void **data;
 	size_t *size;
 
 #ifdef CONFIG_X86_32
 	data =  (void **)__pa_nodebug(&ucode_cpio.data);
 	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+	builtin = (bool *)__pa_nodebug(&ucode_builtin);
 #else
 	data = &ucode_cpio.data;
 	size = &ucode_cpio.size;
+	builtin = &ucode_builtin;
 #endif
 
-	if (!load_builtin_amd_microcode(&cp, family))
+	*builtin = load_builtin_amd_microcode(&cp, family);
+	if (!*builtin)
 		cp = find_ucode_in_initrd();
 
 	if (!(cp.data && cp.size))
@@ -373,7 +378,8 @@ void load_ucode_amd_ap(void)
 		return;
 
 	/* Add CONFIG_RANDOMIZE_MEMORY offset. */
-	cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+	if (!ucode_builtin)
+		cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
 
 	eax = cpuid_eax(0x00000001);
 	eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
@@ -439,7 +445,8 @@ int __init save_microcode_in_initrd_amd(void)
 		container = cont_va;
 
 	/* Add CONFIG_RANDOMIZE_MEMORY offset. */
-	container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+	if (!ucode_builtin)
+		container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
 
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index d9836c5eb694..11c8d9bc762e 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -3266,6 +3266,9 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	/* If we exit via err(), this kills all the threads, restores tty. */
+	atexit(cleanup_devices);
+
 	/* We always have a console device, and it's always device 1. */
 	setup_console();
 
@@ -3369,9 +3372,6 @@ int main(int argc, char *argv[])
 	/* Ensure that we terminate if a device-servicing child dies. */
 	signal(SIGCHLD, kill_launcher);
 
-	/* If we exit via err(), this kills all the threads, restores tty. */
-	atexit(cleanup_devices);
-
 	/* If requested, chroot to a directory */
 	if (chroot_path) {
 		if (chroot(chroot_path) != 0)

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-08-18 20:49 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-08-18 20:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 7b0501b1e7cddd32b265178e32d332bdfbb532d4 x86/smp: Fix __max_logical_packages value setup

An initrd microcode loading fix, and an SMP bootup topology setup fix to resolve 
crashes on SGI/UV systems if the BIOS is configured in a certain way.

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/microcode/AMD: Fix initrd loading with CONFIG_RANDOMIZE_MEMORY=y

Jiri Olsa (1):
      x86/smp: Fix __max_logical_packages value setup


 arch/x86/kernel/cpu/microcode/amd.c |  9 ++++++++-
 arch/x86/kernel/smpboot.c           | 25 ++++++++++++++++---------
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 27a0228c9cae..b816971f5da4 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -355,6 +355,7 @@ void load_ucode_amd_ap(void)
 	unsigned int cpu = smp_processor_id();
 	struct equiv_cpu_entry *eq;
 	struct microcode_amd *mc;
+	u8 *cont = container;
 	u32 rev, eax;
 	u16 eq_id;
 
@@ -371,8 +372,11 @@ void load_ucode_amd_ap(void)
 	if (check_current_patch_level(&rev, false))
 		return;
 
+	/* Add CONFIG_RANDOMIZE_MEMORY offset. */
+	cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
 	eax = cpuid_eax(0x00000001);
-	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+	eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
 
 	eq_id = find_equiv_id(eq, eax);
 	if (!eq_id)
@@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void)
 	else
 		container = cont_va;
 
+	/* Add CONFIG_RANDOMIZE_MEMORY offset. */
+	container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2a6e84a30a54..4296beb8fdd3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 /* Logical package management. We might want to allocate that dynamically */
 static int *physical_to_logical_pkg __read_mostly;
 static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map  __read_mostly;
 static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
 int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
 	if (test_and_set_bit(pkg, physical_package_map))
 		goto found;
 
-	new = find_first_zero_bit(logical_package_map, __max_logical_packages);
-	if (new >= __max_logical_packages) {
+	if (logical_packages_frozen) {
 		physical_to_logical_pkg[pkg] = -1;
-		pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+		pr_warn("APIC(%x) Package %u exceeds logical package max\n",
 			apicid, pkg);
 		return -ENOSPC;
 	}
-	set_bit(new, logical_package_map);
+
+	new = logical_packages++;
 	pr_info("APIC(%x) Converting physical %u to logical package %u\n",
 		apicid, pkg, new);
 	physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
 	}
 
 	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+	logical_packages = 0;
 
 	/*
 	 * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
 	memset(physical_to_logical_pkg, 0xff, size);
 	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
 	physical_package_map = kzalloc(size, GFP_KERNEL);
-	size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
-	logical_package_map = kzalloc(size, GFP_KERNEL);
-
-	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
 	for_each_present_cpu(cpu) {
 		unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
 		set_cpu_possible(cpu, false);
 		set_cpu_present(cpu, false);
 	}
+
+	if (logical_packages > __max_logical_packages) {
+		pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+			logical_packages, __max_logical_packages);
+		logical_packages_frozen = true;
+		__max_logical_packages  = logical_packages;
+	}
+
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
 }
 
 void __init smp_store_boot_cpu_info(void)

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-08-12 19:46 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-08-12 19:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: d52c0569bab4edc888832df44dc7ac28517134f6 x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe()

This is bigger than usual - the reason is partly a pent-up stream of fixes after 
the merge window and partly accidental. The fixes are:

 - 5 patches to fix a boot failure on Andy Lutomirsky's laptop.
 - a KASAN fix
 - 4 SGI UV platform fixes
 - a warnings fix
 - a documentation update
 - a swap entry definition fix
 - a pkeys fix
 - an irq stats fix

 Thanks,

	Ingo

------------------>
Aaron Lu (1):
      x86/irq: Do not substract irq_tlb_count from irq_call_count

Alexander Potapenko (1):
      x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text

Andy Lutomirski (5):
      x86/boot: Run reserve_bios_regions() after we initialize the memory map
      x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly
      x86/boot: Defer setup_real_mode() to early_initcall time
      x86/boot: Rework reserve_real_mode() to allow multiple tries
      x86/efi: Allocate a trampoline if needed in efi_free_boot_services()

Borislav Petkov (1):
      x86/entry: Clarify the RF saving/restoring situation with SYSCALL/SYSRET

Dave Hansen (2):
      x86/mm/pkeys: Fix compact mode by removing protection keys' XSAVE buffer manipulation
      x86/mm: Fix swap entry comment and macro

Mike Travis (4):
      x86/platform/UV: Fix problem with UV4 Socket IDs not being contiguous
      x86/platform/UV: Fix bug with iounmap() of the UV4 EFI System Table causing a crash
      x86/platform/UV: Fix problem with UV4 BIOS providing incorrect PXM values
      x86/platform/UV: Fix kernel panic running RHEL kdump kernel on UV systems

Nicolas Iooss (1):
      x86/mm/kaslr: Fix -Wformat-security warning

Sebastian Andrzej Siewior (2):
      x86/mm: Disable preemption during CR3 read+write
      x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe()

Thomas Garnier (2):
      x86/mm/KASLR: Fix physical memory calculation on KASLR memory randomization
      x86/mm/KASLR: Increase BRK pages for KASLR memory randomization

Valdis Kletnieks (1):
      x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables


 arch/x86/entry/Makefile               |   2 +
 arch/x86/entry/entry_64.S             |  25 ++++--
 arch/x86/include/asm/hardirq.h        |   4 -
 arch/x86/include/asm/pgtable_64.h     |   4 +-
 arch/x86/include/asm/realmode.h       |  10 ++-
 arch/x86/include/asm/tlbflush.h       |   7 ++
 arch/x86/include/asm/uv/bios.h        |   5 +-
 arch/x86/kernel/apic/x2apic_cluster.c |  13 +++-
 arch/x86/kernel/apic/x2apic_uv_x.c    |  42 +++++------
 arch/x86/kernel/fpu/xstate.c          | 138 +++++-----------------------------
 arch/x86/kernel/head32.c              |   2 -
 arch/x86/kernel/head64.c              |   1 -
 arch/x86/kernel/irq.c                 |   3 +-
 arch/x86/kernel/setup.c               |  27 ++++---
 arch/x86/lib/kaslr.c                  |   2 +-
 arch/x86/mm/init.c                    |  14 +++-
 arch/x86/mm/kaslr.c                   |   2 +-
 arch/x86/platform/efi/quirks.c        |  21 ++++++
 arch/x86/platform/uv/bios_uv.c        |   8 +-
 arch/x86/realmode/init.c              |  47 +++++++++---
 20 files changed, 182 insertions(+), 195 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index fe91c25092da..77f28ce9c646 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,6 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
+CFLAGS_syscall_64.o		+= -Wno-override-init
+CFLAGS_syscall_32.o		+= -Wno-override-init
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b846875aeea6..d172c619c449 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -288,11 +288,15 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	jne	opportunistic_sysret_failed
 
 	/*
-	 * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-	 * restoring TF results in a trap from userspace immediately after
-	 * SYSRET.  This would cause an infinite loop whenever #DB happens
-	 * with register state that satisfies the opportunistic SYSRET
-	 * conditions.  For example, single-stepping this user code:
+	 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+	 * restore RF properly. If the slowpath sets it for whatever reason, we
+	 * need to restore it correctly.
+	 *
+	 * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+	 * trap from userspace immediately after SYSRET.  This would cause an
+	 * infinite loop whenever #DB happens with register state that satisfies
+	 * the opportunistic SYSRET conditions.  For example, single-stepping
+	 * this user code:
 	 *
 	 *           movq	$stuck_here, %rcx
 	 *           pushfq
@@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
 .endm
 #endif
 
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+# define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"
+# define POP_SECTION_IRQENTRY	.popsection
+#else
+# define PUSH_SECTION_IRQENTRY
+# define POP_SECTION_IRQENTRY
+#endif
+
 .macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
 apicinterrupt3 \num \sym \do_sym
 trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
 .endm
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7178043b0e1d..59405a248fc2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,10 +22,6 @@ typedef struct {
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
-	/*
-	 * irq_tlb_count is double-counted in irq_call_count, so it must be
-	 * subtracted from irq_call_count when displaying irq_call_count
-	 */
 	unsigned int irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e8ec7ae10fa..1cc82ece9ac1 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
 /* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 9c6b890d5e7a..b2988c0ed829 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
 extern unsigned char secondary_startup_64[];
 #endif
 
+static inline size_t real_mode_size_needed(void)
+{
+	if (real_mode_header)
+		return 0;	/* already allocated. */
+
+	return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
+}
+
+void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
-void setup_real_mode(void);
 
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4e5be94e079a..6fa85944af83 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
 static inline void __native_flush_tlb(void)
 {
+	/*
+	 * If current->mm == NULL then we borrow a mm which may change during a
+	 * task switch and therefore we must not be preempted while we write CR3
+	 * back:
+	 */
+	preempt_disable();
 	native_write_cr3(native_read_cr3());
+	preempt_enable();
 }
 
 static inline void __native_flush_tlb_global_irq_disabled(void)
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index c852590254d5..e652a7cc6186 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -79,7 +79,7 @@ struct uv_gam_range_entry {
 	u16	nasid;		/* HNasid */
 	u16	sockid;		/* Socket ID, high bits of APIC ID */
 	u16	pnode;		/* Index to MMR and GRU spaces */
-	u32	pxm;		/* ACPI proximity domain number */
+	u32	unused2;
 	u32	limit;		/* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
 };
 
@@ -88,7 +88,8 @@ struct uv_gam_range_entry {
 #define	UV_SYSTAB_VERSION_UV4		0x400	/* UV4 BIOS base version */
 #define	UV_SYSTAB_VERSION_UV4_1		0x401	/* + gpa_shift */
 #define	UV_SYSTAB_VERSION_UV4_2		0x402	/* + TYPE_NVRAM/WINDOW/MBOX */
-#define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_2
+#define	UV_SYSTAB_VERSION_UV4_3		0x403	/* - GAM Range PXM Value */
+#define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_3
 
 #define	UV_SYSTAB_TYPE_UNUSED		0	/* End of table (offset == 0) */
 #define	UV_SYSTAB_TYPE_GAM_PARAMS	1	/* GAM PARAM conversions */
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6368fa69d2af..54f35d988025 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
 /*
  * At CPU state changes, update the x2apic cluster sibling info.
  */
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
 {
 	if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
 		return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
 	return 0;
 }
 
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
 {
 	int cpu;
 
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
 static int x2apic_cluster_probe(void)
 {
 	int cpu = smp_processor_id();
+	int ret;
 
 	if (!x2apic_mode)
 		return 0;
 
+	ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+				x2apic_prepare_cpu, x2apic_dead_cpu);
+	if (ret < 0) {
+		pr_err("Failed to register X2APIC_PREPARE\n");
+		return 0;
+	}
 	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
-	cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
-			  x2apic_prepare_cpu, x2apic_dead_cpu);
 	return 1;
 }
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 09b59adaea3f..cb0673c1e940 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	if (strncmp(oem_id, "SGI", 3) != 0)
 		return 0;
 
+	if (numa_off) {
+		pr_err("UV: NUMA is off, disabling UV support\n");
+		return 0;
+	}
+
 	/* Setup early hub type field in uv_hub_info for Node 0 */
 	uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
 
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
 	struct uv_gam_range_entry *gre = uv_gre_table;
 	struct uv_gam_range_s *grt;
 	unsigned long last_limit = 0, ram_limit = 0;
-	int bytes, i, sid, lsid = -1;
+	int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
 
 	if (!gre)
 		return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
 		}
 		sid = gre->sockid - _min_socket;
 		if (lsid < sid) {		/* new range */
-			grt = &_gr_table[sid];
-			grt->base = lsid;
+			grt = &_gr_table[indx];
+			grt->base = lindx;
 			grt->nasid = gre->nasid;
 			grt->limit = last_limit = gre->limit;
 			lsid = sid;
+			lindx = indx++;
 			continue;
 		}
 		if (lsid == sid && !ram_limit) {	/* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
 		}
 		if (!ram_limit) {		/* non-contiguous ram range */
 			grt++;
-			grt->base = sid - 1;
+			grt->base = lindx;
 			grt->nasid = gre->nasid;
 			grt->limit = last_limit = gre->limit;
 			continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
 		if (!index) {
 			pr_info("UV: GAM Range Table...\n");
-			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n",
 				"Range", "", "Size", "Type", "NASID",
-				"SID", "PN", "PXM");
+				"SID", "PN");
 		}
 		pr_info(
-		"UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x %3d\n",
+		"UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
 			index++,
 			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
 			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
 			((unsigned long)(gre->limit - lgre)) >>
 				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
-			gre->type, gre->nasid, gre->sockid,
-			gre->pnode, gre->pxm);
+			gre->type, gre->nasid, gre->sockid, gre->pnode);
 
 		lgre = gre->limit;
 		if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
 		_pnode_to_socket[i] = SOCK_EMPTY;
 
 	/* fill in pnode/node/addr conversion list values */
-	pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+	pr_info("UV: GAM Building socket/pnode conversion tables\n");
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
 		if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
 			continue;
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
 		if (_socket_to_pnode[i] != SOCK_EMPTY)
 			continue;	/* duplicate */
 		_socket_to_pnode[i] = gre->pnode;
-		_socket_to_node[i] = gre->pxm;
 
 		i = gre->pnode - minpnode;
 		_pnode_to_socket[i] = gre->sockid;
 
 		pr_info(
-		"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+		"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
 			gre->sockid, gre->type, gre->nasid,
 			_socket_to_pnode[gre->sockid - minsock],
-			_socket_to_node[gre->sockid - minsock],
 			_pnode_to_socket[gre->pnode - minpnode]);
 	}
 
-	/* check socket -> node values */
+	/* Set socket -> node values */
 	lnid = -1;
 	for_each_present_cpu(cpu) {
 		int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
 		lnid = nid;
 		apicid = per_cpu(x86_cpu_to_apicid, cpu);
 		sockid = apicid >> uv_cpuid.socketid_shift;
-		i = sockid - minsock;
-
-		if (nid != _socket_to_node[i]) {
-			pr_warn(
-			"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
-				i, sockid, gre->type, _socket_to_node[i], nid);
-			_socket_to_node[i] = nid;
-		}
+		_socket_to_node[sockid - minsock] = nid;
+		pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+			sockid, apicid, nid);
 	}
 
 	/* Setup physical blade to pnode translation from GAM Range Table */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 680049aa4593..01567aa87503 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
 	return get_xsave_addr(&fpu->state.xsave, xsave_state);
 }
 
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state".  This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
-		int xstate_feature_mask)
-{
-	xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- *	@xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- *	XFEATURE_MASK_SSE, etc...)
- *	@xsave_state_ptr: a pointer to a copy of the state that you would
- *	like written in to the current task's FPU xsave state.  This pointer
- *	must not be located in the current tasks's xsave area.
- * Output:
- *	address of the state in the xsave area or NULL if the state
- *	is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
-		void *xstate_feature_src, size_t len)
-{
-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
-	struct fpu *fpu = &current->thread.fpu;
-	void *dst;
-
-	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
-		WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
-		return;
-	}
-
-	/*
-	 * Tell the FPU code that we need the FPU state to be in
-	 * 'fpu' (not in the registers), and that we need it to
-	 * be stable while we write to it.
-	 */
-	fpu__current_fpstate_write_begin();
-
-	/*
-	 * This method *WILL* *NOT* work for compact-format
-	 * buffers.  If the 'xstate_feature_mask' is unset in
-	 * xcomp_bv then we may need to move other feature state
-	 * "up" in the buffer.
-	 */
-	if (xsave->header.xcomp_bv & xstate_feature_mask) {
-		WARN_ON_ONCE(1);
-		goto out;
-	}
-
-	/* find the location in the xsave buffer of the desired state */
-	dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
-	/*
-	 * Make sure that the pointer being passed in did not
-	 * come from the xsave buffer itself.
-	 */
-	WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
-	/* put the caller-provided data in the location */
-	memcpy(dst, xstate_feature_src, len);
-
-	/*
-	 * Mark the xfeature so that the CPU knows there is state
-	 * in the buffer now.
-	 */
-	fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
-	/*
-	 * We are done writing to the 'fpu'.  Reenable preeption
-	 * and (possibly) move the fpstate back in to the fpregs.
-	 */
-	fpu__current_fpstate_write_end();
-}
-
 #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
 #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
 
 /*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory.  We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
  */
 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val)
 {
-	struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
-	struct pkru_state *old_pkru_state;
-	struct pkru_state new_pkru_state;
+	u32 old_pkru;
 	int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
 	u32 new_pkru_bits = 0;
 
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	 */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return -EINVAL;
+	/*
+	 * For most XSAVE components, this would be an arduous task:
+	 * brining fpstate up to date with fpregs, updating fpstate,
+	 * then re-populating fpregs.  But, for components that are
+	 * never lazily managed, we can just access the fpregs
+	 * directly.  PKRU is never managed lazily, so we can just
+	 * manipulate it directly.  Make sure it stays that way.
+	 */
+	WARN_ON_ONCE(!use_eager_fpu());
 
 	/* Set the bits we need in PKRU:  */
 	if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	/* Shift the bits in to the correct place in PKRU for pkey: */
 	new_pkru_bits <<= pkey_shift;
 
-	/* Locate old copy of the state in the xsave buffer: */
-	old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
-
-	/*
-	 * When state is not in the buffer, it is in the init
-	 * state, set it manually.  Otherwise, copy out the old
-	 * state.
-	 */
-	if (!old_pkru_state)
-		new_pkru_state.pkru = 0;
-	else
-		new_pkru_state.pkru = old_pkru_state->pkru;
-
-	/* Mask off any old bits in place: */
-	new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
-	/* Set the newly-requested bits: */
-	new_pkru_state.pkru |= new_pkru_bits;
-
-	/*
-	 * We could theoretically live without zeroing pkru.pad.
-	 * The current XSAVE feature state definition says that
-	 * only bytes 0->3 are used.  But we do not want to
-	 * chance leaking kernel stack out to userspace in case a
-	 * memcpy() of the whole xsave buffer was done.
-	 *
-	 * They're in the same cacheline anyway.
-	 */
-	new_pkru_state.pad = 0;
+	/* Get old PKRU and mask off any old bits in place: */
+	old_pkru = read_pkru();
+	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
 
-	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+	/* Write old part along with new part: */
+	write_pkru(old_pkru | new_pkru_bits);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2dda0bc4576e..f16c55bfc090 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
 	/* Initialize 32bit specific setup functions */
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
-	reserve_bios_regions();
 }
 
 asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 99d48e7d2974..54a2372f5dbb 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
 		copy_bootdata(__va(real_mode_data));
 
 	x86_early_init_platform_quirks();
-	reserve_bios_regions();
 
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 61521dc19c10..9f669fdd2010 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_puts(p, "  Rescheduling interrupts\n");
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
-					irq_stats(j)->irq_tlb_count);
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
 	seq_puts(p, "  Function call interrupts\n");
 	seq_printf(p, "%*s: ", prec, "TLB");
 	for_each_online_cpu(j)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 991b77986d57..0fa60f5f5a16 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.oem.arch_setup();
 
-	kernel_randomize_memory();
-
 	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
 
 	max_possible_pfn = max_pfn;
 
+	/*
+	 * Define random base addresses for memory sections after max_pfn is
+	 * defined and before each memory section base is used.
+	 */
+	kernel_randomize_memory();
+
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
 	find_low_pfn_range();
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
 		efi_find_mirror();
 	}
 
+	reserve_bios_regions();
+
 	/*
 	 * The EFI specification says that boot service code won't be called
 	 * after ExitBootServices(). This is, in fact, a lie.
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
 
 	early_trap_pf_init();
 
-	setup_real_mode();
+	/*
+	 * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+	 * with the current CR4 value.  This may not be necessary, but
+	 * auditing all the early-boot CR4 manipulation would be needed to
+	 * rule it out.
+	 */
+	if (boot_cpu_data.cpuid_level >= 0)
+		/* A CPU has %cr4 if and only if it has CPUID. */
+		mmu_cr4_features = __read_cr4();
 
 	memblock_set_current_limit(get_max_mapped());
 
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
 
 	kasan_init();
 
-	if (boot_cpu_data.cpuid_level >= 0) {
-		/* A CPU has %cr4 if and only if it has CPUID */
-		mmu_cr4_features = __read_cr4();
-		if (trampoline_cr4_features)
-			*trampoline_cr4_features = mmu_cr4_features;
-	}
-
 #ifdef CONFIG_X86_32
 	/* sync back kernel address range */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index f7dfeda83e5c..121f59c6ee54 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -19,7 +19,7 @@
 #include <asm/cpufeature.h>
 #include <asm/setup.h>
 
-#define debug_putstr(v) early_printk(v)
+#define debug_putstr(v) early_printk("%s", v)
 #define has_cpuflag(f) boot_cpu_has(f)
 #define get_boot_seed() kaslr_offset()
 #endif
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 620928903be3..d28a2d741f9e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num)
 	return __va(pfn << PAGE_SHIFT);
 }
 
-/* need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE	(6 * PAGE_SIZE)
+/*
+ * By default need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS.
+ * With KASLR memory randomization, depending on the machine e820 memory
+ * and the PUD alignment. We may need twice more pages when KASLR memory
+ * randomization is enabled.
+ */
+#ifndef CONFIG_RANDOMIZE_MEMORY
+#define INIT_PGD_PAGE_COUNT      6
+#else
+#define INIT_PGD_PAGE_COUNT      12
+#endif
+#define INIT_PGT_BUF_SIZE	(INIT_PGD_PAGE_COUNT * PAGE_SIZE)
 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
 void  __init early_alloc_pgt_buf(void)
 {
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 26dccd6c0df1..ec8654f117d8 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void)
 	 * add padding if needed (especially for memory hotplug support).
 	 */
 	BUG_ON(kaslr_regions[0].base != &page_offset_base);
-	memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+	memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
 		CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
 
 	/* Adapt phyiscal memory region size based on available memory */
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 4480c06cade7..89d1146f5a6f 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -254,6 +254,7 @@ void __init efi_free_boot_services(void)
 	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+		size_t rm_size;
 
 		if (md->type != EFI_BOOT_SERVICES_CODE &&
 		    md->type != EFI_BOOT_SERVICES_DATA)
@@ -263,6 +264,26 @@ void __init efi_free_boot_services(void)
 		if (md->attribute & EFI_MEMORY_RUNTIME)
 			continue;
 
+		/*
+		 * Nasty quirk: if all sub-1MB memory is used for boot
+		 * services, we can get here without having allocated the
+		 * real mode trampoline.  It's too late to hand boot services
+		 * memory back to the memblock allocator, so instead
+		 * try to manually allocate the trampoline if needed.
+		 *
+		 * I've seen this on a Dell XPS 13 9350 with firmware
+		 * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+		 * grub2-efi on a hard disk.  (And no, I don't know why
+		 * this happened, but Linux should still try to boot rather
+		 * panicing early.)
+		 */
+		rm_size = real_mode_size_needed();
+		if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+			set_real_mode_mem(start, rm_size);
+			start += rm_size;
+			size -= rm_size;
+		}
+
 		free_bootmem_late(start, size);
 	}
 
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 66b2166ea4a1..4e9fd1378aec 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -199,12 +199,14 @@ void uv_bios_init(void)
 		return;
 	}
 
+	/* Starting with UV4 the UV systab size is variable */
 	if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+		int size = uv_systab->size;
+
 		iounmap(uv_systab);
-		uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+		uv_systab = ioremap(efi.uv_systab, size);
 		if (!uv_systab) {
-			pr_err("UV: UVsystab: ioremap(%d) failed!\n",
-				uv_systab->size);
+			pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
 			return;
 		}
 	}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 705e3fffb4a1..5db706f14111 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,9 +1,11 @@
 #include <linux/io.h>
+#include <linux/slab.h>
 #include <linux/memblock.h>
 
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/realmode.h>
+#include <asm/tlbflush.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
@@ -11,25 +13,37 @@ u32 *trampoline_cr4_features;
 /* Hold the pgd entry used on booting additional CPUs */
 pgd_t trampoline_pgd_entry;
 
+void __init set_real_mode_mem(phys_addr_t mem, size_t size)
+{
+	void *base = __va(mem);
+
+	real_mode_header = (struct real_mode_header *) base;
+	printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+	       base, (unsigned long long)mem, size);
+}
+
 void __init reserve_real_mode(void)
 {
 	phys_addr_t mem;
-	unsigned char *base;
-	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+	size_t size = real_mode_size_needed();
+
+	if (!size)
+		return;
+
+	WARN_ON(slab_is_available());
 
 	/* Has to be under 1M so we can execute real-mode AP code. */
 	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
-	if (!mem)
-		panic("Cannot allocate trampoline\n");
+	if (!mem) {
+		pr_info("No sub-1M memory is available for the trampoline\n");
+		return;
+	}
 
-	base = __va(mem);
 	memblock_reserve(mem, size);
-	real_mode_header = (struct real_mode_header *) base;
-	printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
-	       base, (unsigned long long)mem, size);
+	set_real_mode_mem(mem, size);
 }
 
-void __init setup_real_mode(void)
+static void __init setup_real_mode(void)
 {
 	u16 real_mode_seg;
 	const u32 *rel;
@@ -84,7 +98,7 @@ void __init setup_real_mode(void)
 
 	trampoline_header->start = (u64) secondary_startup_64;
 	trampoline_cr4_features = &trampoline_header->cr4;
-	*trampoline_cr4_features = __read_cr4();
+	*trampoline_cr4_features = mmu_cr4_features;
 
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
 	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@@ -100,7 +114,7 @@ void __init setup_real_mode(void)
  * need to mark it executable at do_pre_smp_initcalls() at least,
  * thus run it as a early_initcall().
  */
-static int __init set_real_mode_permissions(void)
+static void __init set_real_mode_permissions(void)
 {
 	unsigned char *base = (unsigned char *) real_mode_header;
 	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void)
 	set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
 	set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
 	set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+}
+
+static int __init init_real_mode(void)
+{
+	if (!real_mode_header)
+		panic("Real mode trampoline was not allocated");
+
+	setup_real_mode();
+	set_real_mode_permissions();
 
 	return 0;
 }
-early_initcall(set_real_mode_permissions);
+early_initcall(init_real_mode);

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-08-06  6:13 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-08-06  6:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f7d665627e103e82d34306c7d3f6f46f387c0d8b x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace

Two fixes and a cleanup-fix, to the syscall entry code and to ptrace.

 Thanks,

	Ingo

------------------>
Andy Lutomirski (2):
      x86/vdso: Error out if the vDSO isn't a valid DSO
      x86/ptrace: Stop setting TS_COMPAT in ptrace code

David Howells (1):
      x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace


 arch/x86/entry/common.c                |  6 +++++-
 arch/x86/entry/syscalls/syscall_32.tbl |  2 +-
 arch/x86/entry/vdso/vdso2c.h           |  6 ++++++
 arch/x86/include/asm/syscall.h         |  5 +----
 arch/x86/include/asm/thread_info.h     |  3 +++
 arch/x86/kernel/ptrace.c               | 15 +++++++++------
 arch/x86/kernel/signal.c               | 26 ++++++++++++++++++++++++--
 7 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9e1e27d31c6d..be8c403f8117 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -270,8 +270,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	 * handling, because syscall restart has a fixup for compat
 	 * syscalls.  The fixup is exercised by the ptrace_syscall_32
 	 * selftest.
+	 *
+	 * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+	 * special case only applies after poking regs and before the
+	 * very next return to user mode.
 	 */
-	ti->status &= ~TS_COMPAT;
+	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
 #endif
 
 	user_enter_irqoff();
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4cddd17153fb..f848572169ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -294,7 +294,7 @@
 # 285 sys_setaltroot
 286	i386	add_key			sys_add_key
 287	i386	request_key		sys_request_key
-288	i386	keyctl			sys_keyctl
+288	i386	keyctl			sys_keyctl			compat_sys_keyctl
 289	i386	ioprio_set		sys_ioprio_set
 290	i386	ioprio_get		sys_ioprio_get
 291	i386	inotify_init		sys_inotify_init
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 63a03bb91497..4f741192846d 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -22,6 +22,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 
 	ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
 
+	if (hdr->e_type != ET_DYN)
+		fail("input is not a shared object\n");
+
 	/* Walk the segment table. */
 	for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
 		if (GET_LE(&pt[i].p_type) == PT_LOAD) {
@@ -49,6 +52,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	if (stripped_len < load_size)
 		fail("stripped input is too short\n");
 
+	if (!dyn)
+		fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n");
+
 	/* Walk the dynamic table */
 	for (i = 0; dyn + i < dyn_end &&
 		     GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 999b7cd2e78c..4e23dd15c661 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
 	 * TS_COMPAT is set for 32-bit syscall entries and then
 	 * remains set until we return to user mode.
 	 */
-	if (task_thread_info(task)->status & TS_COMPAT)
+	if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
 		/*
 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
 		 * and will match correctly in comparisons.
@@ -239,9 +239,6 @@ static inline int syscall_get_arch(void)
 	 * TS_COMPAT is set for 32-bit syscall entry and then
 	 * remains set until we return to user mode.
 	 *
-	 * TIF_IA32 tasks should always have TS_COMPAT set at
-	 * system call time.
-	 *
 	 * x32 tasks should be considered AUDIT_ARCH_X86_64.
 	 */
 	if (task_thread_info(current)->status & TS_COMPAT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89bff044a6f5..f856c590c309 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -219,6 +219,9 @@ static inline unsigned long current_stack_pointer(void)
  * have to worry about atomic accesses.
  */
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
+#endif
 #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 600edd225e81..f79576a541ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 
 	case offsetof(struct user32, regs.orig_eax):
 		/*
-		 * A 32-bit debugger setting orig_eax means to restore
-		 * the state of the task restarting a 32-bit syscall.
-		 * Make sure we interpret the -ERESTART* codes correctly
-		 * in case the task is not actually still sitting at the
-		 * exit from a 32-bit syscall with TS_COMPAT still set.
+		 * Warning: bizarre corner case fixup here.  A 32-bit
+		 * debugger setting orig_eax to -1 wants to disable
+		 * syscall restart.  Make sure that the syscall
+		 * restart code sign-extends orig_ax.  Also make sure
+		 * we interpret the -ERESTART* codes correctly if
+		 * loaded into regs->ax in case the task is not
+		 * actually still sitting at the exit from a 32-bit
+		 * syscall with TS_COMPAT still set.
 		 */
 		regs->orig_ax = value;
 		if (syscall_get_nr(child, regs) >= 0)
-			task_thread_info(child)->status |= TS_COMPAT;
+			task_thread_info(child)->status |= TS_I386_REGS_POKED;
 		break;
 
 	case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 22cc2f9f8aec..9747a6358949 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-#ifdef CONFIG_X86_64
-	if (in_ia32_syscall())
+	/*
+	 * This function is fundamentally broken as currently
+	 * implemented.
+	 *
+	 * The idea is that we want to trigger a call to the
+	 * restart_block() syscall and that we want in_ia32_syscall(),
+	 * in_x32_syscall(), etc. to match whatever they were in the
+	 * syscall being restarted.  We assume that the syscall
+	 * instruction at (regs->ip - 2) matches whatever syscall
+	 * instruction we used to enter in the first place.
+	 *
+	 * The problem is that we can get here when ptrace pokes
+	 * syscall-like values into regs even if we're not in a syscall
+	 * at all.
+	 *
+	 * For now, we maintain historical behavior and guess based on
+	 * stored state.  We could do better by saving the actual
+	 * syscall arch in restart_block or (with caveats on x32) by
+	 * checking if regs->ip points to 'int $0x80'.  The current
+	 * behavior is incorrect if a tracer has a different bitness
+	 * than the tracee.
+	 */
+#ifdef CONFIG_IA32_EMULATION
+	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-07-13 12:54 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-07-13 12:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Peter Zijlstra

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: abb2bafd295fe962bbadc329dbfb2146457283ac x86/quirks: Add early quirk to reset Apple AirPort card

This tree contains three commits to fix memory corruption bugs with certain Apple 
AirPort cards, plus fixes a X86_BUG() ID definitions collision bug in 
asm/cpufeatures.h.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/bcma/bcma_private.h        # abb2bafd295f: x86/quirks: Add early quirk 
  include/linux/bcma/bcma.h          # abb2bafd295f: x86/quirks: Add early quirk 

 Thanks,

	Ingo

------------------>
Dave Hansen (1):
      x86/cpu: Fix duplicated X86_BUG(9) macro

Lukas Wunner (3):
      x86/quirks: Apply nvidia_bugs quirk only on root bus
      x86/quirks: Reintroduce scanning of secondary buses
      x86/quirks: Add early quirk to reset Apple AirPort card


 arch/x86/include/asm/cpufeatures.h |   6 +--
 arch/x86/kernel/early-quirks.c     | 105 ++++++++++++++++++++++++++++++++-----
 drivers/bcma/bcma_private.h        |   2 -
 include/linux/bcma/bcma.h          |   1 +
 4 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4a413485f9eb..c64b1e9c5d1a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -301,10 +301,6 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +308,7 @@
  */
 #define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
+#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bca14c899137..57b71373bae3 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -11,7 +11,11 @@
 
 #include <linux/pci.h>
 #include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/pci_ids.h>
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_regs.h>
 #include <drm/i915_drm.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
@@ -21,6 +25,9 @@
 #include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/irq_remapping.h>
+#include <asm/early_ioremap.h>
+
+#define dev_err(msg)  pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -76,6 +83,13 @@ static void __init nvidia_bugs(int num, int slot, int func)
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_X86_IO_APIC
 	/*
+	 * Only applies to Nvidia root ports (bus 0) and not to
+	 * Nvidia graphics cards with PCI ports on secondary buses.
+	 */
+	if (num)
+		return;
+
+	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
 	 * Unfortunately that's not true on many Asus boards.
@@ -590,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func)
 #endif
 }
 
+#define BCM4331_MMIO_SIZE	16384
+#define BCM4331_PM_CAP		0x40
+#define bcma_aread32(reg)	ioread32(mmio + 1 * BCMA_CORE_SIZE + reg)
+#define bcma_awrite32(reg, val)	iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg)
+
+static void __init apple_airport_reset(int bus, int slot, int func)
+{
+	void __iomem *mmio;
+	u16 pmcsr;
+	u64 addr;
+	int i;
+
+	if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
+		return;
+
+	/* Card may have been put into PCI_D3hot by grub quirk */
+	pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+
+	if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr);
+		mdelay(10);
+
+		pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+			dev_err("Cannot power up Apple AirPort card\n");
+			return;
+		}
+	}
+
+	addr  =      read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32;
+	addr &= PCI_BASE_ADDRESS_MEM_MASK;
+
+	mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
+	if (!mmio) {
+		dev_err("Cannot iomap Apple AirPort card\n");
+		return;
+	}
+
+	pr_info("Resetting Apple AirPort card (left enabled by EFI)\n");
+
+	for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++)
+		udelay(10);
+
+	bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+	bcma_aread32(BCMA_RESET_CTL);
+	udelay(1);
+
+	bcma_awrite32(BCMA_RESET_CTL, 0);
+	bcma_aread32(BCMA_RESET_CTL);
+	udelay(10);
+
+	early_iounmap(mmio, BCM4331_MMIO_SIZE);
+}
 
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
@@ -603,12 +672,6 @@ struct chipset {
 	void (*f)(int num, int slot, int func);
 };
 
-/*
- * Only works for devices on the root bus. If you add any devices
- * not on bus 0 readd another loop level in early_quirks(). But
- * be careful because at least the Nvidia quirk here relies on
- * only matching on bus 0.
- */
 static struct chipset early_qrk[] __initdata = {
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -638,9 +701,13 @@ static struct chipset early_qrk[] __initdata = {
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
+	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 	{}
 };
 
+static void __init early_pci_scan_bus(int bus);
+
 /**
  * check_dev_quirk - apply early quirks to a given PCI device
  * @num: bus number
@@ -649,7 +716,7 @@ static struct chipset early_qrk[] __initdata = {
  *
  * Check the vendor & device ID against the early quirks table.
  *
- * If the device is single function, let early_quirks() know so we don't
+ * If the device is single function, let early_pci_scan_bus() know so we don't
  * poke at this device again.
  */
 static int __init check_dev_quirk(int num, int slot, int func)
@@ -658,6 +725,7 @@ static int __init check_dev_quirk(int num, int slot, int func)
 	u16 vendor;
 	u16 device;
 	u8 type;
+	u8 sec;
 	int i;
 
 	class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
@@ -685,25 +753,36 @@ static int __init check_dev_quirk(int num, int slot, int func)
 
 	type = read_pci_config_byte(num, slot, func,
 				    PCI_HEADER_TYPE);
+
+	if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
+		sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS);
+		if (sec > num)
+			early_pci_scan_bus(sec);
+	}
+
 	if (!(type & 0x80))
 		return -1;
 
 	return 0;
 }
 
-void __init early_quirks(void)
+static void __init early_pci_scan_bus(int bus)
 {
 	int slot, func;
 
-	if (!early_pci_allowed())
-		return;
-
 	/* Poor man's PCI discovery */
-	/* Only scan the root bus */
 	for (slot = 0; slot < 32; slot++)
 		for (func = 0; func < 8; func++) {
 			/* Only probe function 0 on single fn devices */
-			if (check_dev_quirk(0, slot, func))
+			if (check_dev_quirk(bus, slot, func))
 				break;
 		}
 }
+
+void __init early_quirks(void)
+{
+	if (!early_pci_allowed())
+		return;
+
+	early_pci_scan_bus(0);
+}
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index eda09090cb52..f642c4264c27 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -8,8 +8,6 @@
 #include <linux/bcma/bcma.h>
 #include <linux/delay.h>
 
-#define BCMA_CORE_SIZE		0x1000
-
 #define bcma_err(bus, fmt, ...) \
 	pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__)
 #define bcma_warn(bus, fmt, ...) \
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index e6b41f42602b..3db25df396cb 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -159,6 +159,7 @@ struct bcma_host_ops {
 #define BCMA_CORE_DEFAULT		0xFFF
 
 #define BCMA_MAX_NR_CORES		16
+#define BCMA_CORE_SIZE			0x1000
 
 /* Chip IDs of PCIe devices */
 #define BCMA_CHIP_ID_BCM4313	0x4313

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-07-08 14:00 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-07-08 14:00 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c76a093dc1415d364020b8b33f1e194ef4d26fd0 x86/Documentation: Fix various typos in Documentation/x86/ files

Three fixes:

- A boot crash fix with certain configs
- a MAINTAINERS entry update
- Documentation typo fixes

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/amd_nb: Fix boot crash on non-AMD systems

Jon Mason (1):
      MAINTAINERS: Update the Calgary IOMMU entry

Masanari Iida (1):
      x86/Documentation: Fix various typos in Documentation/x86/ files


 Documentation/x86/intel_mpx.txt       | 6 +++---
 Documentation/x86/tlb.txt             | 4 ++--
 Documentation/x86/x86_64/machinecheck | 2 +-
 MAINTAINERS                           | 6 +++---
 arch/x86/kernel/amd_nb.c              | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
index 1a5a12184a35..85d0549ad846 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.txt
@@ -45,7 +45,7 @@ is how we expect the compiler, application and kernel to work together.
    MPX-instrumented.
 3) The kernel detects that the CPU has MPX, allows the new prctl() to
    succeed, and notes the location of the bounds directory. Userspace is
-   expected to keep the bounds directory at that locationWe note it
+   expected to keep the bounds directory at that location. We note it
    instead of reading it each time because the 'xsave' operation needed
    to access the bounds directory register is an expensive operation.
 4) If the application needs to spill bounds out of the 4 registers, it
@@ -167,7 +167,7 @@ If a #BR is generated due to a bounds violation caused by MPX.
 We need to decode MPX instructions to get violation address and
 set this address into extended struct siginfo.
 
-The _sigfault feild of struct siginfo is extended as follow:
+The _sigfault field of struct siginfo is extended as follow:
 
 87		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
 88		struct {
@@ -240,5 +240,5 @@ them at the same bounds table.
 This is allowed architecturally.  See more information "Intel(R) Architecture
 Instruction Set Extensions Programming Reference" (9.3.4).
 
-However, if users did this, the kernel might be fooled in to unmaping an
+However, if users did this, the kernel might be fooled in to unmapping an
 in-use bounds table since it does not recognize sharing.
diff --git a/Documentation/x86/tlb.txt b/Documentation/x86/tlb.txt
index 39d172326703..6a0607b99ed8 100644
--- a/Documentation/x86/tlb.txt
+++ b/Documentation/x86/tlb.txt
@@ -5,7 +5,7 @@ When the kernel unmaps or modified the attributes of a range of
     from areas other than the one we are trying to flush will be
     destroyed and must be refilled later, at some cost.
  2. Use the invlpg instruction to invalidate a single page at a
-    time.  This could potentialy cost many more instructions, but
+    time.  This could potentially cost many more instructions, but
     it is a much more precise operation, causing no collateral
     damage to other TLB entries.
 
@@ -19,7 +19,7 @@ When the kernel unmaps or modified the attributes of a range of
     work.
  3. The size of the TLB.  The larger the TLB, the more collateral
     damage we do with a full flush.  So, the larger the TLB, the
-    more attrative an individual flush looks.  Data and
+    more attractive an individual flush looks.  Data and
     instructions have separate TLBs, as do different page sizes.
  4. The microarchitecture.  The TLB has become a multi-level
     cache on modern CPUs, and the global flushes have become more
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index b1fb30273286..d0648a74fceb 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -36,7 +36,7 @@ between all CPUs.
 
 check_interval
 	How often to poll for corrected machine check errors, in seconds
-	(Note output is hexademical). Default 5 minutes.  When the poller
+	(Note output is hexadecimal). Default 5 minutes.  When the poller
 	finds MCEs it triggers an exponential speedup (poll more often) on
 	the polling interval.  When the poller stops finding MCEs, it
 	triggers an exponential backoff (poll less often) on the polling
diff --git a/MAINTAINERS b/MAINTAINERS
index 16700e4fcc4a..f589a9d0fb87 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2773,9 +2773,9 @@ F:	include/net/caif/
 F:	net/caif/
 
 CALGARY x86-64 IOMMU
-M:	Muli Ben-Yehuda <muli@il.ibm.com>
-M:	"Jon D. Mason" <jdmason@kudzu.us>
-L:	discuss@x86-64.org
+M:	Muli Ben-Yehuda <mulix@mulix.org>
+M:	Jon Mason <jdmason@kudzu.us>
+L:	iommu@lists.linux-foundation.org
 S:	Maintained
 F:	arch/x86/kernel/pci-calgary_64.c
 F:	arch/x86/kernel/tce_64.c
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index a147e676fc7b..e991d5c8bb3a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
 	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
 		i++;
 
-	if (i == 0)
-		return 0;
+	if (!i)
+		return -ENODEV;
 
 	nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
 	if (!nb)

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-06-10 14:43 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-06-10 14:43 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 9d98bcec731756b8688b59ec998707924d716d7b x86/ioapic: Fix incorrect pointers in ioapic_setup_resources()

Misc fixes from all around the map, plus a commit that introduces a new header of 
Intel model name symbols (unused) that will make the next merge window easier - I 
hope that one is fine.

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/entry/traps: Don't force in_interrupt() to return true in IST handlers

Borislav Petkov (1):
      x86/cpu/AMD: Extend X86_FEATURE_TOPOEXT workaround to newer models

Dave Hansen (1):
      x86/cpu/intel: Introduce macros for Intel family numbers

Dr. David Alan Gilbert (1):
      x86/msr: Use the proper trace point conditional for writes

H. Peter Anvin (1):
      x86, build: copy ldlinux.c32 to image.iso

Rui Wang (1):
      x86/ioapic: Fix incorrect pointers in ioapic_setup_resources()


 arch/x86/boot/Makefile              |  3 ++
 arch/x86/include/asm/intel-family.h | 68 +++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/msr.h          |  4 +--
 arch/x86/kernel/apic/io_apic.c      |  2 +-
 arch/x86/kernel/cpu/amd.c           |  4 +--
 arch/x86/kernel/traps.c             | 20 +++++------
 6 files changed, 86 insertions(+), 15 deletions(-)
 create mode 100644 arch/x86/include/asm/intel-family.h

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 700a9c6e6159..be8e688fa0d4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -162,6 +162,9 @@ isoimage: $(obj)/bzImage
 	for i in lib lib64 share end ; do \
 		if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
 			cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+			if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \
+				cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \
+			fi ; \
 			break ; \
 		fi ; \
 		if [ $$i = end ] ; then exit 1 ; fi ; \
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
new file mode 100644
index 000000000000..6999f7d01a0d
--- /dev/null
+++ b/arch/x86/include/asm/intel-family.h
@@ -0,0 +1,68 @@
+#ifndef _ASM_X86_INTEL_FAMILY_H
+#define _ASM_X86_INTEL_FAMILY_H
+
+/*
+ * "Big Core" Processors (Branded as Core, Xeon, etc...)
+ *
+ * The "_X" parts are generally the EP and EX Xeons, or the
+ * "Extreme" ones, like Broadwell-E.
+ *
+ * Things ending in "2" are usually because we have no better
+ * name for them.  There's no processor called "WESTMERE2".
+ */
+
+#define INTEL_FAM6_CORE_YONAH		0x0E
+#define INTEL_FAM6_CORE2_MEROM		0x0F
+#define INTEL_FAM6_CORE2_MEROM_L	0x16
+#define INTEL_FAM6_CORE2_PENRYN		0x17
+#define INTEL_FAM6_CORE2_DUNNINGTON	0x1D
+
+#define INTEL_FAM6_NEHALEM		0x1E
+#define INTEL_FAM6_NEHALEM_EP		0x1A
+#define INTEL_FAM6_NEHALEM_EX		0x2E
+#define INTEL_FAM6_WESTMERE		0x25
+#define INTEL_FAM6_WESTMERE2		0x1F
+#define INTEL_FAM6_WESTMERE_EP		0x2C
+#define INTEL_FAM6_WESTMERE_EX		0x2F
+
+#define INTEL_FAM6_SANDYBRIDGE		0x2A
+#define INTEL_FAM6_SANDYBRIDGE_X	0x2D
+#define INTEL_FAM6_IVYBRIDGE		0x3A
+#define INTEL_FAM6_IVYBRIDGE_X		0x3E
+
+#define INTEL_FAM6_HASWELL_CORE		0x3C
+#define INTEL_FAM6_HASWELL_X		0x3F
+#define INTEL_FAM6_HASWELL_ULT		0x45
+#define INTEL_FAM6_HASWELL_GT3E		0x46
+
+#define INTEL_FAM6_BROADWELL_CORE	0x3D
+#define INTEL_FAM6_BROADWELL_XEON_D	0x56
+#define INTEL_FAM6_BROADWELL_GT3E	0x47
+#define INTEL_FAM6_BROADWELL_X		0x4F
+
+#define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
+#define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
+#define INTEL_FAM6_SKYLAKE_X		0x55
+#define INTEL_FAM6_KABYLAKE_MOBILE	0x8E
+#define INTEL_FAM6_KABYLAKE_DESKTOP	0x9E
+
+/* "Small Core" Processors (Atom) */
+
+#define INTEL_FAM6_ATOM_PINEVIEW	0x1C
+#define INTEL_FAM6_ATOM_LINCROFT	0x26
+#define INTEL_FAM6_ATOM_PENWELL		0x27
+#define INTEL_FAM6_ATOM_CLOVERVIEW	0x35
+#define INTEL_FAM6_ATOM_CEDARVIEW	0x36
+#define INTEL_FAM6_ATOM_SILVERMONT1	0x37 /* BayTrail/BYT / Valleyview */
+#define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
+#define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
+#define INTEL_FAM6_ATOM_MERRIFIELD1	0x4A /* Tangier */
+#define INTEL_FAM6_ATOM_MERRIFIELD2	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_GOLDMONT	0x5C
+#define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
+
+/* Xeon Phi */
+
+#define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
+
+#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 7dc1d8fef7fd..b5fee97813cd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -122,7 +122,7 @@ notrace static inline void native_write_msr(unsigned int msr,
 		     "2:\n"
 		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
 		     : : "c" (msr), "a"(low), "d" (high) : "memory");
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (msr_tracepoint_active(__tracepoint_write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
 
@@ -141,7 +141,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
 		     : "c" (msr), "0" (low), "d" (high),
 		       [fault] "i" (-EIO)
 		     : "memory");
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (msr_tracepoint_active(__tracepoint_write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), err);
 	return err;
 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 84e33ff5a6d5..446702ed99dc 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2588,8 +2588,8 @@ static struct resource * __init ioapic_setup_resources(void)
 		res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
+		ioapics[i].iomem_res = &res[num];
 		num++;
-		ioapics[i].iomem_res = res;
 	}
 
 	ioapic_resources = res;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c343a54bed39..f5c69d8974e1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -674,14 +674,14 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 	u64 value;
 
 	/* re-enable TopologyExtensions if switched off by BIOS */
-	if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
+	if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) &&
 	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
 
 		if (msr_set_bit(0xc0011005, 54) > 0) {
 			rdmsrl(0xc0011005, value);
 			if (value & BIT_64(54)) {
 				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-				pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+				pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
 			}
 		}
 	}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d1590486204a..00f03d82e69a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -96,6 +96,12 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
 		local_irq_disable();
 }
 
+/*
+ * In IST context, we explicitly disable preemption.  This serves two
+ * purposes: it makes it much less likely that we would accidentally
+ * schedule in IST context and it will force a warning if we somehow
+ * manage to schedule by accident.
+ */
 void ist_enter(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
@@ -110,13 +116,7 @@ void ist_enter(struct pt_regs *regs)
 		rcu_nmi_enter();
 	}
 
-	/*
-	 * We are atomic because we're on the IST stack; or we're on
-	 * x86_32, in which case we still shouldn't schedule; or we're
-	 * on x86_64 and entered from user mode, in which case we're
-	 * still atomic unless ist_begin_non_atomic is called.
-	 */
-	preempt_count_add(HARDIRQ_OFFSET);
+	preempt_disable();
 
 	/* This code is a bit fragile.  Test it. */
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
@@ -124,7 +124,7 @@ void ist_enter(struct pt_regs *regs)
 
 void ist_exit(struct pt_regs *regs)
 {
-	preempt_count_sub(HARDIRQ_OFFSET);
+	preempt_enable_no_resched();
 
 	if (!user_mode(regs))
 		rcu_nmi_exit();
@@ -155,7 +155,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 	BUG_ON((unsigned long)(current_top_of_stack() -
 			       current_stack_pointer()) >= THREAD_SIZE);
 
-	preempt_count_sub(HARDIRQ_OFFSET);
+	preempt_enable_no_resched();
 }
 
 /**
@@ -165,7 +165,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
  */
 void ist_end_non_atomic(void)
 {
-	preempt_count_add(HARDIRQ_OFFSET);
+	preempt_disable();
 }
 
 static nokprobe_inline int

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-05-25 22:00 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-05-25 22:00 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: dc4fac84f8e66b147921ebdc385e767d5def7422 x86/mm: Switch from TASK_SIZE to TASK_SIZE_MAX in the page fault code

Misc fixes: EFI, entry code, pkeys and MPX fixes, TASK_SIZE cleanups and a tsc 
frequency table fix.

 Thanks,

	Ingo

------------------>
Andy Lutomirski (2):
      x86/fsgsbase/64: Use TASK_SIZE_MAX for FSBASE/GSBASE upper limits
      x86/mm: Switch from TASK_SIZE to TASK_SIZE_MAX in the page fault code

Dave Hansen (2):
      x86/cpufeature, x86/mm/pkeys: Fix broken compile-time disabling of pkeys
      x86/mm/mpx: Work around MPX erratum SKD046

Jeremy Compostella (1):
      x86/tsc: Add missing Cherrytrail frequency to the table

Josh Poimboeuf (1):
      x86/entry/64: Fix stack return address retrieval in thunk

Linus Torvalds (1):
      x86/efi: Fix 7-parameter efi_call()s


 arch/x86/entry/thunk_64.S                | 11 +++++-----
 arch/x86/include/asm/bugs.h              |  8 +++++++
 arch/x86/include/asm/cpufeature.h        | 12 +++++------
 arch/x86/include/asm/disabled-features.h |  6 +++---
 arch/x86/kernel/cpu/common.c             |  7 ++++++
 arch/x86/kernel/cpu/intel.c              | 37 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/process_64.c             |  4 ++--
 arch/x86/kernel/ptrace.c                 |  4 ++--
 arch/x86/kernel/tsc_msr.c                |  3 +++
 arch/x86/mm/fault.c                      | 11 +++++++---
 arch/x86/platform/efi/efi_stub_64.S      |  9 ++++----
 11 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 98df1fa8825c..027aec4a74df 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -8,16 +8,15 @@
 #include <linux/linkage.h>
 #include "calling.h"
 #include <asm/asm.h>
-#include <asm/frame.h>
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func, put_ret_addr_in_rdi=0
 	.globl \name
 	.type \name, @function
 \name:
-	FRAME_BEGIN
+	pushq %rbp
+	movq %rsp, %rbp
 
-	/* this one pushes 9 elems, the next one would be %rIP */
 	pushq %rdi
 	pushq %rsi
 	pushq %rdx
@@ -29,8 +28,8 @@
 	pushq %r11
 
 	.if \put_ret_addr_in_rdi
-	/* 9*8(%rsp) is return addr on stack */
-	movq 9*8(%rsp), %rdi
+	/* 8(%rbp) is return addr on stack */
+	movq 8(%rbp), %rdi
 	.endif
 
 	call \func
@@ -65,7 +64,7 @@
 	popq %rdx
 	popq %rsi
 	popq %rdi
-	FRAME_END
+	popq %rbp
 	ret
 	_ASM_NOKPROBE(restore)
 #endif
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 08abf639075f..5490bbaf71d5 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -1,8 +1,16 @@
 #ifndef _ASM_X86_BUGS_H
 #define _ASM_X86_BUGS_H
 
+#include <asm/processor.h>
+
 extern void check_bugs(void);
 
+#if defined(CONFIG_CPU_SUP_INTEL)
+void check_mpx_erratum(struct cpuinfo_x86 *c);
+#else
+static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
+#endif
+
 #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
 int ppro_with_ram_bug(void);
 #else
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 25ebb54905e0..483fb547e3c0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -64,9 +64,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) ||	\
 	   (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) ||	\
 	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) ||	\
-	   (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
+	   (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK14)) ||	\
+	   (((bit)>>5)==15 && (1UL<<((bit)&31) & REQUIRED_MASK15)) ||	\
+	   (((bit)>>5)==16 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
 
 #define DISABLED_MASK_BIT_SET(bit)					\
 	 ( (((bit)>>5)==0  && (1UL<<((bit)&31) & DISABLED_MASK0 )) ||	\
@@ -83,9 +83,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) ||	\
 	   (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) ||	\
 	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) ||	\
-	   (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
+	   (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK14)) ||	\
+	   (((bit)>>5)==15 && (1UL<<((bit)&31) & DISABLED_MASK15)) ||	\
+	   (((bit)>>5)==16 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 39343be7d4f4..911e9358ceb1 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -29,11 +29,11 @@
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU		(1<<(X86_FEATURE_PKU))
-# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE))
-#else
 # define DISABLE_PKU		0
 # define DISABLE_OSPKE		0
+#else
+# define DISABLE_PKU		(1<<(X86_FEATURE_PKU & 31))
+# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
 #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
 
 /*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6ef6ed9ccca6..0fe6953f421c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,7 @@
 #include <asm/mtrr.h>
 #include <linux/numa.h>
 #include <asm/asm.h>
+#include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 static __init int setup_disable_smep(char *arg)
 {
 	setup_clear_cpu_cap(X86_FEATURE_SMEP);
+	/* Check for things that depend on SMEP being enabled: */
+	check_mpx_erratum(&boot_cpu_data);
 	return 1;
 }
 __setup("nosmep", setup_disable_smep);
@@ -310,6 +313,10 @@ static bool pku_disabled;
 
 static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 {
+	/* check the boot processor, plus compile options for PKU: */
+	if (!cpu_feature_enabled(X86_FEATURE_PKU))
+		return;
+	/* checks the actual processor's cpuid bits: */
 	if (!cpu_has(c, X86_FEATURE_PKU))
 		return;
 	if (pku_disabled)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8dae51fd3db1..6e2ffbebbcdb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -25,6 +25,41 @@
 #include <asm/apic.h>
 #endif
 
+/*
+ * Just in case our CPU detection goes bad, or you have a weird system,
+ * allow a way to override the automatic disabling of MPX.
+ */
+static int forcempx;
+
+static int __init forcempx_setup(char *__unused)
+{
+	forcempx = 1;
+
+	return 1;
+}
+__setup("intel-skd-046-workaround=disable", forcempx_setup);
+
+void check_mpx_erratum(struct cpuinfo_x86 *c)
+{
+	if (forcempx)
+		return;
+	/*
+	 * Turn off the MPX feature on CPUs where SMEP is not
+	 * available or disabled.
+	 *
+	 * Works around Intel Erratum SKD046: "Branch Instructions
+	 * May Initialize MPX Bound Registers Incorrectly".
+	 *
+	 * This might falsely disable MPX on systems without
+	 * SMEP, like Atom processors without SMEP.  But there
+	 * is no such hardware known at the moment.
+	 */
+	if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
+		setup_clear_cpu_cap(X86_FEATURE_MPX);
+		pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
+	}
+}
+
 static void early_init_intel(struct cpuinfo_x86 *c)
 {
 	u64 misc_enable;
@@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		if (edx & (1U << 28))
 			c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
 	}
+
+	check_mpx_erratum(c);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6b16c36f0939..6e789ca1f841 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -532,7 +532,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 
 	switch (code) {
 	case ARCH_SET_GS:
-		if (addr >= TASK_SIZE_OF(task))
+		if (addr >= TASK_SIZE_MAX)
 			return -EPERM;
 		cpu = get_cpu();
 		task->thread.gsindex = 0;
@@ -546,7 +546,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 	case ARCH_SET_FS:
 		/* Not strictly needed for fs, but do it for symmetry
 		   with gs */
-		if (addr >= TASK_SIZE_OF(task))
+		if (addr >= TASK_SIZE_MAX)
 			return -EPERM;
 		cpu = get_cpu();
 		task->thread.fsindex = 0;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e60ef918f53d..600edd225e81 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -392,7 +392,7 @@ static int putreg(struct task_struct *child,
 
 #ifdef CONFIG_X86_64
 	case offsetof(struct user_regs_struct,fs_base):
-		if (value >= TASK_SIZE_OF(child))
+		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		/*
 		 * When changing the segment base, use do_arch_prctl
@@ -406,7 +406,7 @@ static int putreg(struct task_struct *child,
 		/*
 		 * Exactly the same here as the %fs handling above.
 		 */
-		if (value >= TASK_SIZE_OF(child))
+		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		if (child->thread.gsbase != value)
 			return do_arch_prctl(child, ARCH_SET_GS, value);
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 6aa0f4d9eea6..9911a0620f9a 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -23,6 +23,7 @@
 #include <asm/param.h>
 
 /* CPU reference clock frequency: in KHz */
+#define FREQ_80		80000
 #define FREQ_83		83200
 #define FREQ_100	99840
 #define FREQ_133	133200
@@ -56,6 +57,8 @@ static struct freq_desc freq_desc_tables[] = {
 	{ 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
 	/* ANN */
 	{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
+	/* AIRMONT */
+	{ 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80,	0, 0, 0 } },
 };
 
 static int match_cpu(u8 family, u8 model)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5ce1ed02f7e8..7d1fa7cd2374 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -292,7 +292,7 @@ void vmalloc_sync_all(void)
 		return;
 
 	for (address = VMALLOC_START & PMD_MASK;
-	     address >= TASK_SIZE && address < FIXADDR_TOP;
+	     address >= TASK_SIZE_MAX && address < FIXADDR_TOP;
 	     address += PMD_SIZE) {
 		struct page *page;
 
@@ -854,8 +854,13 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 				return;
 		}
 #endif
-		/* Kernel addresses are always protection faults: */
-		if (address >= TASK_SIZE)
+
+		/*
+		 * To avoid leaking information about the kernel page table
+		 * layout, pretend that user-mode accesses to kernel addresses
+		 * are always protection faults.
+		 */
+		if (address >= TASK_SIZE_MAX)
 			error_code |= PF_PROT;
 
 		if (likely(show_unhandled_signals))
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 92723aeae0f9..cd95075944ab 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -11,7 +11,6 @@
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
 #include <asm/page_types.h>
-#include <asm/frame.h>
 
 #define SAVE_XMM			\
 	mov %rsp, %rax;			\
@@ -40,10 +39,10 @@
 	mov (%rsp), %rsp
 
 ENTRY(efi_call)
-	FRAME_BEGIN
+	pushq %rbp
+	movq %rsp, %rbp
 	SAVE_XMM
-	mov (%rsp), %rax
-	mov 8(%rax), %rax
+	mov 16(%rbp), %rax
 	subq $48, %rsp
 	mov %r9, 32(%rsp)
 	mov %rax, 40(%rsp)
@@ -53,6 +52,6 @@ ENTRY(efi_call)
 	call *%rdi
 	addq $48, %rsp
 	RESTORE_XMM
-	FRAME_END
+	popq %rbp
 	ret
 ENDPROC(efi_call)

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-05-10 12:01 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-05-10 12:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 8d415ee225a3d15d3e3029524350e8237a4de7b8 x86/topology: Set x86_max_cores to 1 for CONFIG_SMP=n

Two topology corner case fixes, and a MAINTAINERS file update for mmiotrace 
maintenance.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  MAINTAINERS                        # 4abac0d0bbc6: MAINTAINERS: Add mmiotrace e

 Thanks,

	Ingo

------------------>
Ingo Molnar (1):
      MAINTAINERS: Add mmiotrace entry

Thomas Gleixner (2):
      x86/topology: Handle CPUID bogosity gracefully
      x86/topology: Set x86_max_cores to 1 for CONFIG_SMP=n


 MAINTAINERS                 | 14 ++++++++++++++
 arch/x86/kernel/cpu/intel.c |  2 +-
 arch/x86/kernel/smpboot.c   |  5 +++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a727d9959ecd..9c567a431d8d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11318,6 +11318,20 @@ F:	include/trace/
 F:	kernel/trace/
 F:	tools/testing/selftests/ftrace/
 
+TRACING MMIO ACCESSES (MMIOTRACE)
+M:	Steven Rostedt <rostedt@goodmis.org>
+M:	Ingo Molnar <mingo@kernel.org>
+R:	Karol Herbst <karolherbst@gmail.com>
+R:	Pekka Paalanen <ppaalanen@gmail.com>
+S:	Maintained
+L:	linux-kernel@vger.kernel.org
+L:	nouveau@lists.freedesktop.org
+F:	kernel/trace/trace_mmiotrace.c
+F:	include/linux/mmiotrace.h
+F:	arch/x86/mm/kmmio.c
+F:	arch/x86/mm/mmio-mod.c
+F:	arch/x86/mm/testmmiotrace.c
+
 TRIVIAL PATCHES
 M:	Jiri Kosina <trivial@kernel.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1f7fdb91a818..e4393bfc7f0d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -336,7 +336,7 @@ static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
 {
 	unsigned int eax, ebx, ecx, edx;
 
-	if (c->cpuid_level < 4)
+	if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
 		return 1;
 
 	/* Intel has a non-standard dependency on %ecx for this CPUID level. */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a2065d3b3b39..0e4329ed91ef 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -332,6 +332,11 @@ static void __init smp_init_package_map(void)
 	 * primary cores.
 	 */
 	ncpus = boot_cpu_data.x86_max_cores;
+	if (!ncpus) {
+		pr_warn("x86_max_cores == zero !?!?");
+		ncpus = 1;
+	}
+
 	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
 
 	/*

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-05-06 19:20 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-05-06 19:20 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 886123fb3a8656699dff40afa0573df359abeb18 x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO

This tree contains two fixes: a boot fix for older SGI/UV systems, and an APIC 
calibration fix.

 Thanks,

	Ingo

------------------>
Alex Thorlton (1):
      x86/platform/UV: Bring back the call to map_low_mmrs in uv_system_init

Chen Yu (1):
      x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO


 arch/x86/kernel/apic/x2apic_uv_x.c | 4 +---
 arch/x86/kernel/tsc_msr.c          | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8f4942e2bcbb..d7ce96a7daca 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -891,9 +891,7 @@ void __init uv_system_init(void)
 	}
 	pr_info("UV: Found %s hub\n", hub);
 
-	/* We now only need to map the MMRs on UV1 */
-	if (is_uv1_hub())
-		map_low_mmrs();
+	map_low_mmrs();
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 92ae6acac8a7..6aa0f4d9eea6 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void)
 
 	if (freq_desc_tables[cpu_index].msr_plat) {
 		rdmsr(MSR_PLATFORM_INFO, lo, hi);
-		ratio = (lo >> 8) & 0x1f;
+		ratio = (lo >> 8) & 0xff;
 	} else {
 		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 		ratio = (hi >> 8) & 0x1f;

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-04-28 18:00 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-04-28 18:00 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1bdb8970392a68489b469c3a330a1adb5ef61beb x86/apic: Handle zero vector gracefully in clear_vector_irq()

Two boot crash fixes and an IRQ handling crash fix.

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      Revert "x86/mm/32: Set NX in __supported_pte_mask before enabling paging"

Keith Busch (1):
      x86/apic: Handle zero vector gracefully in clear_vector_irq()

Ross Lagerwall (1):
      xen/qspinlock: Don't kick CPU if IRQ is not initialized


 arch/x86/kernel/apic/vector.c | 3 ++-
 arch/x86/kernel/head_32.S     | 6 ------
 arch/x86/mm/setup_nx.c        | 5 +++--
 arch/x86/xen/spinlock.c       | 6 ++++++
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ad59d70bcb1a..ef495511f019 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -256,7 +256,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	struct irq_desc *desc;
 	int cpu, vector;
 
-	BUG_ON(!data->cfg.vector);
+	if (!data->cfg.vector)
+		return;
 
 	vector = data->cfg.vector;
 	for_each_cpu_and(cpu, data->domain, cpu_online_mask)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 54cdbd2003fe..af1112980dd4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -389,12 +389,6 @@ ENTRY(startup_32_smp)
 	/* Make changes effective */
 	wrmsr
 
-	/*
-	 * And make sure that all the mappings we set up have NX set from
-	 * the beginning.
-	 */
-	orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
-
 enable_paging:
 
 /*
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 8bea84724a7d..f65a33f505b6 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -32,8 +32,9 @@ early_param("noexec", noexec_setup);
 
 void x86_configure_nx(void)
 {
-	/* If disable_nx is set, clear NX on all new mappings going forward. */
-	if (disable_nx)
+	if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
+		__supported_pte_mask |= _PAGE_NX;
+	else
 		__supported_pte_mask &= ~_PAGE_NX;
 }
 
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 9e2ba5c6e1dd..f42e78de1e10 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -27,6 +27,12 @@ static bool xen_pvspin = true;
 
 static void xen_qlock_kick(int cpu)
 {
+	int irq = per_cpu(lock_kicker_irq, cpu);
+
+	/* Don't kick if the target's kicker interrupt is not initialized. */
+	if (irq == -1)
+		return;
+
 	xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 }
 

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-04-14 14:13 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-04-14 14:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: a3125494cff084b098c80bb36fbe2061ffed9d52 x86/mce: Avoid using object after free in genpool

Misc fixes: a binutils fix, an lguest fix, an mcelog fix and a missing 
documentation fix.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/lguest/interrupts_and_traps.c# f87e0434a3be: lguest, x86/entry/32: Fix ha
  drivers/lguest/lg.h                # f87e0434a3be: lguest, x86/entry/32: Fix ha
  drivers/lguest/x86/core.c          # f87e0434a3be: lguest, x86/entry/32: Fix ha

 Thanks,

	Ingo

------------------>
Dave Hansen (1):
      x86/mm/pkeys: Add missing Documentation

H.J. Lu (1):
      x86/build: Build compressed x86 kernels as PIE

Rusty Russell (1):
      lguest, x86/entry/32: Fix handling of guest syscalls using interrupt gates

Tony Luck (1):
      x86/mce: Avoid using object after free in genpool


 Documentation/x86/protection-keys.txt    | 27 +++++++++++++++++++++++++++
 arch/x86/boot/compressed/Makefile        | 14 +++++++++++++-
 arch/x86/boot/compressed/head_32.S       | 28 ++++++++++++++++++++++++++++
 arch/x86/boot/compressed/head_64.S       |  8 ++++++++
 arch/x86/kernel/cpu/mcheck/mce-genpool.c |  4 ++--
 drivers/lguest/interrupts_and_traps.c    |  6 +++++-
 drivers/lguest/lg.h                      |  1 +
 drivers/lguest/x86/core.c                |  6 +++++-
 8 files changed, 89 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/x86/protection-keys.txt

diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt
new file mode 100644
index 000000000000..c281ded1ba16
--- /dev/null
+++ b/Documentation/x86/protection-keys.txt
@@ -0,0 +1,27 @@
+Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
+which will be found on future Intel CPUs.
+
+Memory Protection Keys provides a mechanism for enforcing page-based
+protections, but without requiring modification of the page tables
+when an application changes protection domains.  It works by
+dedicating 4 previously ignored bits in each page table entry to a
+"protection key", giving 16 possible keys.
+
+There is also a new user-accessible register (PKRU) with two separate
+bits (Access Disable and Write Disable) for each key.  Being a CPU
+register, PKRU is inherently thread-local, potentially giving each
+thread a different set of protections from every other thread.
+
+There are two new instructions (RDPKRU/WRPKRU) for reading and writing
+to the new register.  The feature is only available in 64-bit mode,
+even though there is theoretically space in the PAE PTEs.  These
+permissions are enforced on data access only and have no effect on
+instruction fetches.
+
+=========================== Config Option ===========================
+
+This config option adds approximately 1.5kb of text. and 50 bytes of
+data to the executable.  A workload which does large O_DIRECT reads
+of holes in XFS files was run to exercise get_user_pages_fast().  No
+performance delta was observed with the config option
+enabled or disabled.
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6915ff2bd996..8774cb23064f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 	vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
 
 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
-KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
+KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 cflags-$(CONFIG_X86_32) := -march=i386
 cflags-$(CONFIG_X86_64) := -mcmodel=small
@@ -40,6 +40,18 @@ GCOV_PROFILE := n
 UBSAN_SANITIZE :=n
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
+ifeq ($(CONFIG_RELOCATABLE),y)
+# If kernel is relocatable, build compressed kernel as PIE.
+ifeq ($(CONFIG_X86_32),y)
+LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
+else
+# To build 64-bit compressed kernel as PIE, we disable relocation
+# overflow check to avoid relocation overflow error with a new linker
+# command-line option, -z noreloc-overflow.
+LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
+	&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
+endif
+endif
 LDFLAGS_vmlinux := -T
 
 hostprogs-y	:= mkpiggy
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 8ef964ddc18e..0256064da8da 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -31,6 +31,34 @@
 #include <asm/asm-offsets.h>
 #include <asm/bootparam.h>
 
+/*
+ * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
+ * relocation to get the symbol address in PIC.  When the compressed x86
+ * kernel isn't built as PIC, the linker optimizes R_386_GOT32X
+ * relocations to their fixed symbol addresses.  However, when the
+ * compressed x86 kernel is loaded at a different address, it leads
+ * to the following load failure:
+ *
+ *   Failed to allocate space for phdrs
+ *
+ * during the decompression stage.
+ *
+ * If the compressed x86 kernel is relocatable at run-time, it should be
+ * compiled with -fPIE, instead of -fPIC, if possible and should be built as
+ * Position Independent Executable (PIE) so that linker won't optimize
+ * R_386_GOT32X relocation to its fixed symbol address.  Older
+ * linkers generate R_386_32 relocations against locally defined symbols,
+ * _bss, _ebss, _got and _egot, in PIE.  It isn't wrong, just less
+ * optimal than R_386_RELATIVE.  But the x86 kernel fails to properly handle
+ * R_386_32 relocations when relocating the kernel.  To generate
+ * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
+ * hidden:
+ */
+	.hidden _bss
+	.hidden _ebss
+	.hidden _got
+	.hidden _egot
+
 	__HEAD
 ENTRY(startup_32)
 #ifdef CONFIG_EFI_STUB
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index b0c0d16ef58d..86558a199139 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,14 @@
 #include <asm/asm-offsets.h>
 #include <asm/bootparam.h>
 
+/*
+ * Locally defined symbols should be marked hidden:
+ */
+	.hidden _bss
+	.hidden _ebss
+	.hidden _got
+	.hidden _egot
+
 	__HEAD
 	.code32
 ENTRY(startup_32)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 0a850100c594..2658e2af74ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ];
 void mce_gen_pool_process(void)
 {
 	struct llist_node *head;
-	struct mce_evt_llist *node;
+	struct mce_evt_llist *node, *tmp;
 	struct mce *mce;
 
 	head = llist_del_all(&mce_event_llist);
@@ -37,7 +37,7 @@ void mce_gen_pool_process(void)
 		return;
 
 	head = llist_reverse_order(head);
-	llist_for_each_entry(node, head, llnode) {
+	llist_for_each_entry_safe(node, tmp, head, llnode) {
 		mce = &node->mce;
 		atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
 		gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index eb934b0242e0..67392b6ab845 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -331,7 +331,7 @@ void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
  * Actually now I think of it, it's possible that Ron *is* half the Plan 9
  * userbase.  Oh well.
  */
-static bool could_be_syscall(unsigned int num)
+bool could_be_syscall(unsigned int num)
 {
 	/* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */
 	return num == IA32_SYSCALL_VECTOR || num == syscall_vector;
@@ -416,6 +416,10 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
  *
  * This routine indicates if a particular trap number could be delivered
  * directly.
+ *
+ * Unfortunately, Linux 4.6 started using an interrupt gate instead of a
+ * trap gate for syscalls, so this trick is ineffective.  See Mastery for
+ * how we could do this anyway...
  */
 static bool direct_trap(unsigned int num)
 {
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index ac8ad0461e80..69b3814afd2f 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -167,6 +167,7 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
 bool send_notify_to_eventfd(struct lg_cpu *cpu);
 void init_clockdev(struct lg_cpu *cpu);
 bool check_syscall_vector(struct lguest *lg);
+bool could_be_syscall(unsigned int num);
 int init_interrupts(void);
 void free_interrupts(void);
 
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 6a4cd771a2be..adc162c7040d 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -429,8 +429,12 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 			return;
 		break;
 	case 32 ... 255:
+		/* This might be a syscall. */
+		if (could_be_syscall(cpu->regs->trapnum))
+			break;
+
 		/*
-		 * These values mean a real interrupt occurred, in which case
+		 * Other values mean a real interrupt occurred, in which case
 		 * the Host handler has already been run. We just do a
 		 * friendly check if another process should now be run, then
 		 * return to run the Guest again.

^ permalink raw reply	[flat|nested] 505+ messages in thread
* [GIT PULL] x86 fixes
@ 2016-03-24  8:01 Ingo Molnar
  0 siblings, 0 replies; 505+ messages in thread
From: Ingo Molnar @ 2016-03-24  8:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 9da77666d6975219281fd400eb9608a047337414 x86/msr: Remove unused native_read_tscp()

Misc fixes:

 - fix hotplug bugs
 - fix irq live lock
 - fix various topology handling bugs
 - fix APIC ACK ordering
 - fix PV iopl handling
 - fix speling
 - fix/tweak memcpy_mcsafe() return value
 - fix fbcon bug
 - remove stray prototypes

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/nmi.h                # f09e3f4fe42c: x86/apic: Remove declaration
  tools/testing/selftests/x86/Makefile# b08983015cdd: selftests/x86: Add an iopl t
  tools/testing/selftests/x86/iopl.c # b08983015cdd: selftests/x86: Add an iopl t

 Thanks,

	Ingo

------------------>
Adam Buchbinder (1):
      x86: Fix misspellings in comments

Andy Lutomirski (3):
      selftests/x86: Add an iopl test
      x86/iopl/64: Properly context-switch IOPL on Xen PV
      x86/iopl: Fix iopl capability check on Xen PV

Dave Jones (1):
      x86/apic: Fix suspicious RCU usage in smp_trace_call_function_interrupt()

Nicolai Stange (1):
      arch/x86/irq: Purge useless handler declarations from hw_irq.h

Peter Zijlstra (2):
      x86/topology: Fix logical package mapping
      x86/topology: Fix Intel HT disable

Prarit Bhargava (1):
      x86/msr: Remove unused native_read_tscp()

Thomas Gleixner (7):
      x86/tsc: Prevent NULL pointer deref in calibrate_delay_is_known()
      x86/irq: Cure live lock in fixup_irqs()
      x86/topology: Use total_cpus not nr_cpu_ids for logical packages
      x86/apb/timer: Use proper mask to modify hotplug action
      x86/apic/uv: Fix the hotplug notifier
      x86/hpet: Use proper mask to modify hotplug action
      x86/oprofile/nmi: Add missing hotplug FROZEN handling

Tony Luck (1):
      x86/mm, x86/mce: Fix return type/value for memcpy_mcsafe()

Vitaly Kuznetsov (1):
      x86/video: Don't assume all FB devices are PCI devices

Yaowei Bai (1):
      x86/apic: Remove declaration of unused hw_nmi_is_cpu_stuck


 arch/x86/entry/vdso/vclock_gettime.c               |   2 +-
 arch/x86/events/intel/lbr.c                        |   2 +-
 arch/x86/events/perf_event.h                       |   2 +-
 arch/x86/include/asm/apic.h                        |   2 +-
 arch/x86/include/asm/ftrace.h                      |   2 +-
 arch/x86/include/asm/hw_irq.h                      |  15 +--
 arch/x86/include/asm/msr.h                         |   8 --
 arch/x86/include/asm/rwsem.h                       |   2 +-
 arch/x86/include/asm/string_64.h                   |   4 +-
 arch/x86/include/asm/xen/hypervisor.h              |   2 +
 arch/x86/kernel/acpi/boot.c                        |   4 +-
 arch/x86/kernel/apb_timer.c                        |   2 +-
 arch/x86/kernel/apic/apic.c                        |   2 +-
 arch/x86/kernel/apic/vector.c                      |  88 +++++++++++---
 arch/x86/kernel/apic/x2apic_uv_x.c                 |   5 +-
 arch/x86/kernel/apm_32.c                           |   2 +-
 arch/x86/kernel/cpu/amd.c                          |   2 +-
 arch/x86/kernel/cpu/common.c                       |   4 +-
 arch/x86/kernel/cpu/mtrr/generic.c                 |   2 +-
 arch/x86/kernel/early_printk.c                     |   2 +-
 arch/x86/kernel/fpu/regset.c                       |   2 +-
 arch/x86/kernel/hpet.c                             |   2 +-
 arch/x86/kernel/ioport.c                           |  12 +-
 arch/x86/kernel/kexec-bzimage64.c                  |   2 +-
 arch/x86/kernel/kgdb.c                             |   4 +-
 arch/x86/kernel/kvmclock.c                         |   2 +-
 arch/x86/kernel/process_64.c                       |  12 ++
 arch/x86/kernel/smpboot.c                          |  27 +++--
 arch/x86/kernel/tsc.c                              |   8 +-
 arch/x86/kvm/mmu.c                                 |   4 +-
 arch/x86/kvm/vmx.c                                 |   4 +-
 arch/x86/kvm/x86.c                                 |   2 +-
 arch/x86/lib/memcpy_64.S                           |   7 +-
 arch/x86/lib/memset_64.S                           |   2 +-
 arch/x86/mm/mpx.c                                  |   4 +-
 arch/x86/mm/pat.c                                  |   2 +-
 arch/x86/oprofile/nmi_int.c                        |   3 +-
 .../intel-mid/device_libs/platform_bma023.c        |   2 +-
 .../intel-mid/device_libs/platform_emc1403.c       |   2 +-
 .../intel-mid/device_libs/platform_gpio_keys.c     |   2 +-
 .../intel-mid/device_libs/platform_lis331.c        |   2 +-
 .../intel-mid/device_libs/platform_max7315.c       |   2 +-
 .../intel-mid/device_libs/platform_mpu3050.c       |   2 +-
 .../platform/intel-mid/device_libs/platform_msic.c |   2 +-
 .../intel-mid/device_libs/platform_msic_audio.c    |   2 +-
 .../intel-mid/device_libs/platform_msic_battery.c  |   2 +-
 .../intel-mid/device_libs/platform_msic_gpio.c     |   2 +-
 .../intel-mid/device_libs/platform_msic_ocd.c      |   2 +-
 .../device_libs/platform_msic_power_btn.c          |   2 +-
 .../intel-mid/device_libs/platform_msic_thermal.c  |   2 +-
 .../intel-mid/device_libs/platform_pmic_gpio.c     |   2 +-
 .../intel-mid/device_libs/platform_tc35876x.c      |   2 +-
 .../intel-mid/device_libs/platform_tca6416.c       |   2 +-
 arch/x86/purgatory/stack.S                         |   2 +-
 arch/x86/video/fbdev.c                             |  18 ++-
 arch/x86/xen/enlighten.c                           |   2 +-
 arch/x86/xen/mmu.c                                 |   8 +-
 arch/x86/xen/xen-head.S                            |   2 +-
 include/linux/nmi.h                                |   1 -
 tools/testing/selftests/x86/Makefile               |   2 +-
 tools/testing/selftests/x86/iopl.c                 | 135 +++++++++++++++++++++
 61 files changed, 329 insertions(+), 126 deletions(-)
 create mode 100644 tools/testing/selftests/x86/iopl.c

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 1a50e09c945b..03c3eb77bfce 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -178,7 +178,7 @@ notrace static cycle_t vread_tsc(void)
 
 	/*
 	 * GCC likes to generate cmov here, but this branch is extremely
-	 * predictable (it's just a funciton of time and the likely is
+	 * predictable (it's just a function of time and the likely is
 	 * very likely) and there's a data dependence, so force GCC
 	 * to generate a branch instead.  I don't barrier() because
 	 * we don't actually need a barrier, and if this function
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 69dd11887dd1..6c3b7c1780c9 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -649,7 +649,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 
 /*
  * return the type of control flow change at address "from"
- * intruction is not necessarily a branch (in case of interrupt).
+ * instruction is not necessarily a branch (in case of interrupt).
  *
  * The branch type returned also includes the priv level of the
  * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 68155cafa8a1..ba6ef18528c9 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -272,7 +272,7 @@ struct cpu_hw_events {
  * events to select for counter rescheduling.
  *
  * Care must be taken as the rescheduling algorithm is O(n!) which
- * will increase scheduling cycles for an over-commited system
+ * will increase scheduling cycles for an over-committed system
  * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
  * and its counter masks must be kept at a minimum.
  */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 0899cfc8dfe8..98f25bbafac4 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -643,8 +643,8 @@ static inline void entering_irq(void)
 
 static inline void entering_ack_irq(void)
 {
-	ack_APIC_irq();
 	entering_irq();
+	ack_APIC_irq();
 }
 
 static inline void ipi_entering_ack_irq(void)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 24938852db30..abbad505dd24 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -52,7 +52,7 @@ int ftrace_int3_handler(struct pt_regs *regs);
  * this screws up the trace output when tracing a ia32 task.
  * Instead of reporting bogus syscalls, just do not trace them.
  *
- * If the user realy wants these, then they should use the
+ * If the user really wants these, then they should use the
  * raw syscall tracepoints with filtering.
  */
 #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1815b736269d..b90e1053049b 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -141,6 +141,7 @@ struct irq_alloc_info {
 struct irq_cfg {
 	unsigned int		dest_apicid;
 	u8			vector;
+	u8			old_vector;
 };
 
 extern struct irq_cfg *irq_cfg(unsigned int irq);
@@ -168,20 +169,6 @@ extern atomic_t irq_mis_count;
 
 extern void elcr_set_level_irq(unsigned int irq);
 
-/* SMP */
-extern __visible void smp_apic_timer_interrupt(struct pt_regs *);
-extern __visible void smp_spurious_interrupt(struct pt_regs *);
-extern __visible void smp_x86_platform_ipi(struct pt_regs *);
-extern __visible void smp_error_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_IO_APIC
-extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
-#endif
-#ifdef CONFIG_SMP
-extern __visible void smp_reschedule_interrupt(struct pt_regs *);
-extern __visible void smp_call_function_interrupt(struct pt_regs *);
-extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
-#endif
-
 extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
 #define trace_irq_entries_start irq_entries_start
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 93fb7c1cffda..7a79ee2778b3 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -42,14 +42,6 @@ struct saved_msrs {
 	struct saved_msr *array;
 };
 
-static inline unsigned long long native_read_tscp(unsigned int *aux)
-{
-	unsigned long low, high;
-	asm volatile(".byte 0x0f,0x01,0xf9"
-		     : "=a" (low), "=d" (high), "=c" (*aux));
-	return low | ((u64)high << 32);
-}
-
 /*
  * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
  * constraint has different meanings. For i386, "A" means exactly
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index cad82c9c2fde..ceec86eb68e9 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -25,7 +25,7 @@
  * This should be totally fair - if anything is waiting, a process that wants a
  * lock will go to the back of the queue. When the currently active lock is
  * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
+ * that will be woken up; if there's a bunch of consecutive readers at the
  * front, then they'll all be woken up, but no other readers will be.
  */
 
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ca6ba3607705..90dbbd9666d4 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -87,9 +87,9 @@ int strcmp(const char *cs, const char *ct);
  *
  * Low level memory copy function that catches machine checks
  *
- * Return true for success, false for fail
+ * Return 0 for success, -EFAULT for fail
  */
-bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 8b2d4bea9962..39171b3646bb 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,4 +62,6 @@ void xen_arch_register_cpu(int num);
 void xen_arch_unregister_cpu(int num);
 #endif
 
+extern void xen_set_iopl_mask(unsigned mask);
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e75907601a41..8c2f1ef6ca23 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -956,7 +956,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 
 	/*
 	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
-	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+	 * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
 	 */
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
@@ -984,7 +984,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
 
 	/*
 	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
-	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+	 * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
 	 */
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 222a57076039..cefacbad1531 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -221,7 +221,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
 	unsigned long cpu = (unsigned long)hcpu;
 	struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
 
-	switch (action & 0xf) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DEAD:
 		dw_apb_clockevent_pause(adev->timer);
 		if (system_state == SYSTEM_RUNNING) {
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 531b9611c51d..d356987a04e9 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1611,7 +1611,7 @@ void __init enable_IR_x2apic(void)
 	legacy_pic->mask_all();
 	mask_ioapic_entries();
 
-	/* If irq_remapping_prepare() succeded, try to enable it */
+	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
 		ir_stat = try_to_enable_IR();
 	/* ir_stat contains the remap mode or an error code */
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3b670df4ba7b..ad59d70bcb1a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -213,6 +213,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
 	d->move_in_progress = !cpumask_empty(d->old_domain);
+	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:
@@ -655,46 +656,97 @@ void irq_complete_move(struct irq_cfg *cfg)
 }
 
 /*
- * Called with @desc->lock held and interrupts disabled.
+ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
  */
 void irq_force_complete_move(struct irq_desc *desc)
 {
 	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
 	struct apic_chip_data *data = apic_chip_data(irqdata);
 	struct irq_cfg *cfg = data ? &data->cfg : NULL;
+	unsigned int cpu;
 
 	if (!cfg)
 		return;
 
-	__irq_complete_move(cfg, cfg->vector);
-
 	/*
 	 * This is tricky. If the cleanup of @data->old_domain has not been
 	 * done yet, then the following setaffinity call will fail with
 	 * -EBUSY. This can leave the interrupt in a stale state.
 	 *
-	 * The cleanup cannot make progress because we hold @desc->lock. So in
-	 * case @data->old_domain is not yet cleaned up, we need to drop the
-	 * lock and acquire it again. @desc cannot go away, because the
-	 * hotplug code holds the sparse irq lock.
+	 * All CPUs are stuck in stop machine with interrupts disabled so
+	 * calling __irq_complete_move() would be completely pointless.
 	 */
 	raw_spin_lock(&vector_lock);
-	/* Clean out all offline cpus (including ourself) first. */
+	/*
+	 * Clean out all offline cpus (including the outgoing one) from the
+	 * old_domain mask.
+	 */
 	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-	while (!cpumask_empty(data->old_domain)) {
+
+	/*
+	 * If move_in_progress is cleared and the old_domain mask is empty,
+	 * then there is nothing to cleanup. fixup_irqs() will take care of
+	 * the stale vectors on the outgoing cpu.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
 		raw_spin_unlock(&vector_lock);
-		raw_spin_unlock(&desc->lock);
-		cpu_relax();
-		raw_spin_lock(&desc->lock);
+		return;
+	}
+
+	/*
+	 * 1) The interrupt is in move_in_progress state. That means that we
+	 *    have not seen an interrupt since the io_apic was reprogrammed to
+	 *    the new vector.
+	 *
+	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
+	 *    have not been processed yet.
+	 */
+	if (data->move_in_progress) {
 		/*
-		 * Reevaluate apic_chip_data. It might have been cleared after
-		 * we dropped @desc->lock.
+		 * In theory there is a race:
+		 *
+		 * set_ioapic(new_vector) <-- Interrupt is raised before update
+		 *			      is effective, i.e. it's raised on
+		 *			      the old vector.
+		 *
+		 * So if the target cpu cannot handle that interrupt before
+		 * the old vector is cleaned up, we get a spurious interrupt
+		 * and in the worst case the ioapic irq line becomes stale.
+		 *
+		 * But in case of cpu hotplug this should be a non issue
+		 * because if the affinity update happens right before all
+		 * cpus rendevouz in stop machine, there is no way that the
+		 * interrupt can be blocked on the target cpu because all cpus
+		 * loops first with interrupts enabled in stop machine, so the
+		 * old vector is not yet cleaned up when the interrupt fires.
+		 *
+		 * So the only way to run into this issue is if the delivery
+		 * of the interrupt on the apic/system bus would be delayed
+		 * beyond the point where the target cpu disables interrupts
+		 * in stop machine. I doubt that it can happen, but at least
+		 * there is a theroretical chance. Virtualization might be
+		 * able to expose this, but AFAICT the IOAPIC emulation is not
+		 * as stupid as the real hardware.
+		 *
+		 * Anyway, there is nothing we can do about that at this point
+		 * w/o refactoring the whole fixup_irq() business completely.
+		 * We print at least the irq number and the old vector number,
+		 * so we have the necessary information when a problem in that
+		 * area arises.
 		 */
-		data = apic_chip_data(irqdata);
-		if (!data)
-			return;
-		raw_spin_lock(&vector_lock);
+		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
+			irqdata->irq, cfg->old_vector);
 	}
+	/*
+	 * If old_domain is not empty, then other cpus still have the irq
+	 * descriptor set in their vector array. Clean it up.
+	 */
+	for_each_cpu(cpu, data->old_domain)
+		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
+
+	/* Cleanup the left overs of the (half finished) move */
+	cpumask_clear(data->old_domain);
+	data->move_in_progress = 0;
 	raw_spin_unlock(&vector_lock);
 }
 #endif
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 624db00583f4..8f4942e2bcbb 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -792,7 +792,8 @@ static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
 {
 	long cpu = (long)hcpu;
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
 		uv_heartbeat_enable(cpu);
 		break;
@@ -860,7 +861,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
  */
 void uv_cpu_init(void)
 {
-	/* CPU 0 initilization will be done via uv_system_init. */
+	/* CPU 0 initialization will be done via uv_system_init. */
 	if (!uv_blade_info)
 		return;
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 052c9c3026cc..9307f182fe30 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1088,7 +1088,7 @@ static int apm_get_battery_status(u_short which, u_short *status,
  *	@device: identity of device
  *	@enable: on/off
  *
- *	Activate or deactive power management on either a specific device
+ *	Activate or deactivate power management on either a specific device
  *	or the entire system (%APM_DEVICE_ALL).
  */
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 97c59fd60702..b39338c4b260 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -82,7 +82,7 @@ static void init_amd_k5(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_32
 /*
  * General Systems BIOSen alias the cpu frequency registers
- * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+ * of the Elan at 0x000df000. Unfortunately, one of the Linux
  * drivers subsequently pokes it, and changes the CPU speed.
  * Workaround : Remove the unneeded alias.
  */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 249461f95851..62590aa064c8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -925,7 +925,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
-	/* Clear/Set all flags overriden by options, after probe */
+	/* Clear/Set all flags overridden by options, after probe */
 	for (i = 0; i < NCAPINTS; i++) {
 		c->x86_capability[i] &= ~cpu_caps_cleared[i];
 		c->x86_capability[i] |= cpu_caps_set[i];
@@ -984,7 +984,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	x86_init_cache_qos(c);
 
 	/*
-	 * Clear/Set all flags overriden by options, need do it
+	 * Clear/Set all flags overridden by options, need do it
 	 * before following smp all cpus cap AND.
 	 */
 	for (i = 0; i < NCAPINTS; i++) {
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fcbcb2f678ca..19f57360dfd2 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -42,7 +42,7 @@ EXPORT_SYMBOL_GPL(mtrr_state);
  * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
  * Opteron Processors" (26094 Rev. 3.30 February 2006), section
  * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
- * to 1 during BIOS initalization of the fixed MTRRs, then cleared to
+ * to 1 during BIOS initialization of the fixed MTRRs, then cleared to
  * 0 for operation."
  */
 static inline void k8_check_syscfg_dram_mod_en(void)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 21bf92490a7b..8a121991e5ba 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -287,7 +287,7 @@ static __init void early_pci_serial_init(char *s)
 	}
 
 	/*
-	 * Lastly, initalize the hardware
+	 * Lastly, initialize the hardware
 	 */
 	if (*s) {
 		if (strcmp(s, "nocfg") == 0)
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 0bc3490420c5..8bd1c003942a 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -8,7 +8,7 @@
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
  * as the "regset->n" for the xstate regset will be updated based on the feature
- * capabilites supported by the xsave.
+ * capabilities supported by the xsave.
  */
 int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index be0ebbb6d1d1..a1f0e4a5c47e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -717,7 +717,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 	struct hpet_work_struct work;
 	struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
 
-	switch (action & 0xf) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 		INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
 		init_completion(&work.complete);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 37dae792dbbe..589b3193f102 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -96,9 +96,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 SYSCALL_DEFINE1(iopl, unsigned int, level)
 {
 	struct pt_regs *regs = current_pt_regs();
-	unsigned int old = (regs->flags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
 
+	/*
+	 * Careful: the IOPL bits in regs->flags are undefined under Xen PV
+	 * and changing them has no effect.
+	 */
+	unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+
 	if (level > 3)
 		return -EINVAL;
 	/* Trying to gain more privileges? */
@@ -106,8 +111,9 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-	t->iopl = level << 12;
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
+		(level << X86_EFLAGS_IOPL_BIT);
+	t->iopl = level << X86_EFLAGS_IOPL_BIT;
 	set_iopl_mask(t->iopl);
 
 	return 0;
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 0f8a6bbaaa44..2af478e3fd4e 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -271,7 +271,7 @@ static int bzImage64_probe(const char *buf, unsigned long len)
 	int ret = -ENOEXEC;
 	struct setup_header *header;
 
-	/* kernel should be atleast two sectors long */
+	/* kernel should be at least two sectors long */
 	if (len < 2 * 512) {
 		pr_err("File is too short to be a bzImage\n");
 		return ret;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ed15cd486d06..2da6ee9ae69b 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -609,9 +609,9 @@ static struct notifier_block kgdb_notifier = {
 };
 
 /**
- *	kgdb_arch_init - Perform any architecture specific initalization.
+ *	kgdb_arch_init - Perform any architecture specific initialization.
  *
- *	This function will handle the initalization of any architecture
+ *	This function will handle the initialization of any architecture
  *	specific callbacks.
  */
 int kgdb_arch_init(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 72cef58693c7..1d39bfbd26bb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -226,7 +226,7 @@ static void kvm_setup_secondary_clock(void)
  * registered memory location. If the guest happens to shutdown, this memory
  * won't be valid. In cases like kexec, in which you install a new kernel, this
  * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writting anything
+ * kind of shutdown from our side, we unregister the clock by writing anything
  * that does not have the 'enable' bit set in the msr
  */
 #ifdef CONFIG_KEXEC_CORE
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b9d99e0f82c4..9f751876066f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -48,6 +48,7 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
+#include <asm/xen/hypervisor.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -411,6 +412,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+#ifdef CONFIG_XEN
+	/*
+	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
+	 * current_pt_regs()->flags may not match the current task's
+	 * intended IOPL.  We need to switch it manually.
+	 */
+	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
+		     prev->iopl != next->iopl))
+		xen_set_iopl_mask(next->iopl);
+#endif
+
 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
 		/*
 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 643dbdccf4bc..b2c99f811c3f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -274,11 +274,6 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
 	if (test_and_set_bit(pkg, physical_package_map))
 		goto found;
 
-	if (pkg < __max_logical_packages) {
-		set_bit(pkg, logical_package_map);
-		physical_to_logical_pkg[pkg] = pkg;
-		goto found;
-	}
 	new = find_first_zero_bit(logical_package_map, __max_logical_packages);
 	if (new >= __max_logical_packages) {
 		physical_to_logical_pkg[pkg] = -1;
@@ -317,9 +312,27 @@ static void __init smp_init_package_map(void)
 	/*
 	 * Today neither Intel nor AMD support heterogenous systems. That
 	 * might change in the future....
+	 *
+	 * While ideally we'd want '* smp_num_siblings' in the below @ncpus
+	 * computation, this won't actually work since some Intel BIOSes
+	 * report inconsistent HT data when they disable HT.
+	 *
+	 * In particular, they reduce the APIC-IDs to only include the cores,
+	 * but leave the CPUID topology to say there are (2) siblings.
+	 * This means we don't know how many threads there will be until
+	 * after the APIC enumeration.
+	 *
+	 * By not including this we'll sometimes over-estimate the number of
+	 * logical packages by the amount of !present siblings, but this is
+	 * still better than MAX_LOCAL_APIC.
+	 *
+	 * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
+	 * on the command line leading to a similar issue as the HT disable
+	 * problem because the hyperthreads are usually enumerated after the
+	 * primary cores.
 	 */
-	ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
-	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+	ncpus = boot_cpu_data.x86_max_cores;
+	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
 
 	/*
 	 * Possibly larger than what we need as the number of apic ids per
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 56380440d862..c9c4c7ce3eb2 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -881,7 +881,7 @@ void tsc_restore_sched_clock_state(void)
 	local_irq_save(flags);
 
 	/*
-	 * We're comming out of suspend, there's no concurrency yet; don't
+	 * We're coming out of suspend, there's no concurrency yet; don't
 	 * bother being nice about the RCU stuff, just write to both
 	 * data fields.
 	 */
@@ -1306,11 +1306,15 @@ void __init tsc_init(void)
 unsigned long calibrate_delay_is_known(void)
 {
 	int sibling, cpu = smp_processor_id();
+	struct cpumask *mask = topology_core_cpumask(cpu);
 
 	if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
-	sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+	if (!mask)
+		return 0;
+
+	sibling = cpumask_any_but(mask, cpu);
 	if (sibling < nr_cpu_ids)
 		return cpu_data(sibling).loops_per_jiffy;
 	return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1e7a49bfc94f..ddb3291d49c9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -478,7 +478,7 @@ static bool spte_is_locklessly_modifiable(u64 spte)
 static bool spte_has_volatile_bits(u64 spte)
 {
 	/*
-	 * Always atomicly update spte if it can be updated
+	 * Always atomically update spte if it can be updated
 	 * out of mmu-lock, it can ensure dirty bit is not lost,
 	 * also, it can help us to get a stable is_writable_pte()
 	 * to ensure tlb flush is not missed.
@@ -549,7 +549,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
 
 	/*
 	 * For the spte updated out of mmu-lock is safe, since
-	 * we always atomicly update it, see the comments in
+	 * we always atomically update it, see the comments in
 	 * spte_has_volatile_bits().
 	 */
 	if (spte_is_locklessly_modifiable(old_spte) &&
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9bd8f44baded..1735ae9d684a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5496,7 +5496,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 		return kvm_set_cr4(vcpu, val);
 }
 
-/* called to set cr0 as approriate for clts instruction exit. */
+/* called to set cr0 as appropriate for clts instruction exit. */
 static void handle_clts(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu)) {
@@ -7244,7 +7244,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 	/* The value to write might be 32 or 64 bits, depending on L1's long
 	 * mode, and eventually we need to write that into a field of several
 	 * possible lengths. The code below first zero-extends the value to 64
-	 * bit (field_value), and then copies only the approriate number of
+	 * bit (field_value), and then copies only the appropriate number of
 	 * bits into the vmcs12 field.
 	 */
 	u64 field_value = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eaf6ee8c28b8..4838d35c9641 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1562,7 +1562,7 @@ static cycle_t read_tsc(void)
 
 	/*
 	 * GCC likes to generate cmov here, but this branch is extremely
-	 * predictable (it's just a funciton of time and the likely is
+	 * predictable (it's just a function of time and the likely is
 	 * very likely) and there's a data dependence, so force GCC
 	 * to generate a branch instead.  I don't barrier() because
 	 * we don't actually need a barrier, and if this function
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index cbb8ee5830ff..2ec0b0abbfaa 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,6 +1,7 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
+#include <asm/errno.h&