All of lore.kernel.org
 help / color / mirror / Atom feed
* x86: disable IRQs before changing CR4
@ 2017-11-17 18:19 ` Nadav Amit
  0 siblings, 0 replies; 14+ messages in thread
From: Nadav Amit @ 2017-11-17 18:19 UTC (permalink / raw)
  To: linux-edac, kvm
  Cc: nadav.amit, Nadav Amit, Andy Lutomirski, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, x86, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

CR4 changes need to be performed while IRQs are disabled in order to
update the CR4 shadow and the actual register atomically. Actually, they
are needed regardless of CR4 shadowing, since CR4 are performed in a
read-modify-write manner.

Currently, however, this is not the case, as can be experienced by
adding warnings when CR4 updates are performed and interrupts are
enabled. It also appears that CR4 changes with enabled interrupts can be
triggered by the user (PR_SET_TSC).

If CR4 updates are done while interrupts are enabled, an interrupt can
be delivered between the CR4 read and the corresponding write of the
modified value to CR4. If the interrupt handler changes CR4, the write
would ignore the modified value.

It is not clear there are currently interrupt handlers that modify CR4
and do not restore immediately the original value. However, a recent
patch considered doing just that. Prevent the issue by: adding a debug
warning if CR4 is updated with enabled interrupts; changing CR4
manipulation function names to reflect the fact they need disabled IRQs
and fix the callers.

Note that in some cases, e.g., kvm_cpu_vmxon(), it appears that saving
and restoring the IRQs could have been spared. Yet, since these calls do
not appear to be on the hot path, save and restore the IRQs to be on the
safe side.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>

Signed-off-by: Nadav Amit <namit@vmware.com>
---
 arch/x86/include/asm/mmu_context.h   |  4 ++--
 arch/x86/include/asm/tlbflush.h      | 31 +++++++++++++++++++----------
 arch/x86/include/asm/virtext.h       |  2 +-
 arch/x86/kernel/cpu/common.c         | 38 ++++++++++++++++++++++++++----------
 arch/x86/kernel/cpu/mcheck/mce.c     |  5 ++++-
 arch/x86/kernel/cpu/mcheck/p5.c      |  6 +++++-
 arch/x86/kernel/cpu/mcheck/winchip.c |  5 ++++-
 arch/x86/kernel/fpu/init.c           |  2 +-
 arch/x86/kernel/fpu/xstate.c         |  4 ++--
 arch/x86/kernel/process.c            | 20 ++++++++++++++-----
 arch/x86/kernel/reboot.c             |  2 +-
 arch/x86/kvm/vmx.c                   | 13 ++++++++++--
 arch/x86/mm/init.c                   |  6 +++++-
 13 files changed, 100 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6699fc441644..637cbda77eda 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -30,9 +30,9 @@ static inline void load_mm_cr4(struct mm_struct *mm)
 {
 	if (static_key_false(&rdpmc_always_available) ||
 	    atomic_read(&mm->context.perf_rdpmc_allowed))
-		cr4_set_bits(X86_CR4_PCE);
+		cr4_set_bits_irqs_off(X86_CR4_PCE);
 	else
-		cr4_clear_bits(X86_CR4_PCE);
+		cr4_clear_bits_irqs_off(X86_CR4_PCE);
 }
 #else
 static inline void load_mm_cr4(struct mm_struct *mm) {}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..8ce9d7b10793 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -173,40 +173,46 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
+static inline void update_cr4(unsigned long cr4)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(!irqs_disabled());
+#endif
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
+static inline void cr4_set_bits_irqs_off(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 | mask) != cr4) {
 		cr4 |= mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
+		update_cr4(cr4);
 	}
 }
 
 /* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
+static inline void cr4_clear_bits_irqs_off(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 & ~mask) != cr4) {
 		cr4 &= ~mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
+		update_cr4(cr4);
 	}
 }
 
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqs_off(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	cr4 ^= mask;
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
+	update_cr4(cr4);
 }
 
 /* Read the CR4 shadow. */
@@ -226,10 +232,15 @@ extern u32 *trampoline_cr4_features;
 
 static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 {
+	unsigned long flags;
+
 	mmu_cr4_features |= mask;
 	if (trampoline_cr4_features)
 		*trampoline_cr4_features = mmu_cr4_features;
-	cr4_set_bits(mask);
+
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(mask);
+	local_irq_restore(flags);
 }
 
 extern void initialize_tlbstate_and_flush(void);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 0116b2ee9e64..b403ca417b7d 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -41,7 +41,7 @@ static inline int cpu_has_vmx(void)
 static inline void cpu_vmxoff(void)
 {
 	asm volatile (ASM_VMX_VMXOFF : : : "cc");
-	cr4_clear_bits(X86_CR4_VMXE);
+	cr4_clear_bits_irqs_off(X86_CR4_VMXE);
 }
 
 static inline int cpu_vmx_enabled(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..f31890787d01 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -302,8 +302,14 @@ __setup("nosmep", setup_disable_smep);
 
 static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_SMEP))
-		cr4_set_bits(X86_CR4_SMEP);
+	unsigned long flags;
+
+	if (!cpu_has(c, X86_FEATURE_SMEP))
+		return;
+
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_SMEP);
+	local_irq_restore(flags);
 }
 
 static __init int setup_disable_smap(char *arg)
@@ -320,13 +326,16 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	/* This should have been cleared long ago */
 	BUG_ON(eflags & X86_EFLAGS_AC);
 
-	if (cpu_has(c, X86_FEATURE_SMAP)) {
+	if (!cpu_has(c, X86_FEATURE_SMAP))
+		return;
+
+	local_irq_save(eflags);
 #ifdef CONFIG_X86_SMAP
-		cr4_set_bits(X86_CR4_SMAP);
+	cr4_set_bits_irqs_off(X86_CR4_SMAP);
 #else
-		cr4_clear_bits(X86_CR4_SMAP);
+	cr4_clear_bits_irqs_off(X86_CR4_SMAP);
 #endif
-	}
+	local_irq_restore(eflags);
 }
 
 /*
@@ -336,6 +345,8 @@ static bool pku_disabled;
 
 static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
+
 	/* check the boot processor, plus compile options for PKU: */
 	if (!cpu_feature_enabled(X86_FEATURE_PKU))
 		return;
@@ -345,7 +356,10 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 	if (pku_disabled)
 		return;
 
-	cr4_set_bits(X86_CR4_PKE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_PKE);
+	local_irq_restore(flags);
+
 	/*
 	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
 	 * cpuid bit to be set.  We need to ensure that we
@@ -1147,7 +1161,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
-	/* Set up SMEP/SMAP */
+	/*
+	 * Set up SMEP/SMAP. Disable interrupts to prevent triggering a warning
+	 * as CR4 changes must be done with disabled interrupts.
+	 */
 	setup_smep(c);
 	setup_smap(c);
 
@@ -1520,7 +1537,7 @@ void cpu_init(void)
 
 	pr_debug("Initializing CPU#%d\n", cpu);
 
-	cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+	cr4_clear_bits_irqs_off(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
 	/*
 	 * Initialize the per-CPU GDT with the boot GDT,
@@ -1613,7 +1630,8 @@ void cpu_init(void)
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
 	    boot_cpu_has(X86_FEATURE_TSC) ||
 	    boot_cpu_has(X86_FEATURE_DE))
-		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+		cr4_clear_bits_irqs_off(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|
+					X86_CR4_DE);
 
 	load_current_idt();
 	switch_to_new_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..1aac196eb145 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
 {
 	enum mcp_flags m_fl = 0;
 	mce_banks_t all_banks;
+	unsigned long flags;
 	u64 cap;
 
 	if (!mca_cfg.bootlog)
@@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
 	bitmap_fill(all_banks, MAX_NR_BANKS);
 	machine_check_poll(MCP_UC | m_fl, &all_banks);
 
-	cr4_set_bits(X86_CR4_MCE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_MCE);
+	local_irq_restore(flags);
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 5cddf831720f..e4bb9573cfe5 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -43,6 +43,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
 /* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	u32 l, h;
 
 	/* Default P5 to off as its often misconnected: */
@@ -63,7 +64,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 	pr_info("Intel old style machine check architecture supported.\n");
 
 	/* Enable MCE: */
-	cr4_set_bits(X86_CR4_MCE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_MCE);
+	local_irq_restore(flags);
+
 	pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 }
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 3b45b270a865..72213d75c865 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -27,6 +27,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 /* Set up machine check reporting on the Winchip C6 series */
 void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	u32 lo, hi;
 
 	machine_check_vector = winchip_machine_check;
@@ -38,7 +39,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
 
-	cr4_set_bits(X86_CR4_MCE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_MCE);
+	local_irq_restore(flags);
 
 	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e3d9a5..db57b217e123 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -23,7 +23,7 @@ static void fpu__init_cpu_generic(void)
 	if (boot_cpu_has(X86_FEATURE_XMM))
 		cr4_mask |= X86_CR4_OSXMMEXCPT;
 	if (cr4_mask)
-		cr4_set_bits(cr4_mask);
+		cr4_set_bits_irqs_off(cr4_mask);
 
 	cr0 = read_cr0();
 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..9d3c7a1a4ce5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -237,7 +237,7 @@ void fpu__init_cpu_xstate(void)
 
 	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
-	cr4_set_bits(X86_CR4_OSXSAVE);
+	cr4_set_bits_irqs_off(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 }
 
@@ -713,7 +713,7 @@ static int init_xstate_size(void)
 static void fpu__init_disable_system_xstate(void)
 {
 	xfeatures_mask = 0;
-	cr4_clear_bits(X86_CR4_OSXSAVE);
+	cr4_clear_bits_irqs_off(X86_CR4_OSXSAVE);
 	fpu__xstate_clear_all_cpu_caps();
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c67685337c5a..412265fe14df 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -128,25 +128,35 @@ void flush_thread(void)
 
 void disable_TSC(void)
 {
+	unsigned long flags;
+
 	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOTSC))
+	if (!test_and_set_thread_flag(TIF_NOTSC)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
-		cr4_set_bits(X86_CR4_TSD);
+		local_irq_save(flags);
+		cr4_set_bits_irqs_off(X86_CR4_TSD);
+		local_irq_restore(flags);
+	}
 	preempt_enable();
 }
 
 static void enable_TSC(void)
 {
+	unsigned long flags;
+
 	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOTSC))
+	if (test_and_clear_thread_flag(TIF_NOTSC)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
-		cr4_clear_bits(X86_CR4_TSD);
+		local_irq_save(flags);
+		cr4_clear_bits_irqs_off(X86_CR4_TSD);
+		local_irq_restore(flags);
+	}
 	preempt_enable();
 }
 
@@ -293,7 +303,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	}
 
 	if ((tifp ^ tifn) & _TIF_NOTSC)
-		cr4_toggle_bits(X86_CR4_TSD);
+		cr4_toggle_bits_irqs_off(X86_CR4_TSD);
 
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2126b9d27c34..86ad70c02607 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -109,7 +109,7 @@ void __noreturn machine_real_restart(unsigned int type)
 
 	/* Exiting long mode will fail if CR4.PCIDE is set. */
 	if (static_cpu_has(X86_FEATURE_PCID))
-		cr4_clear_bits(X86_CR4_PCIDE);
+		cr4_clear_bits_irqs_off(X86_CR4_PCIDE);
 #endif
 
 	/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a6f4f095f8f4..a0b387dfbf5c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3500,7 +3500,12 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
-	cr4_set_bits(X86_CR4_VMXE);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_VMXE);
+	local_irq_restore(flags);
+
 	intel_pt_handle_vmx(1);
 
 	asm volatile (ASM_VMX_VMXON_RAX
@@ -3565,10 +3570,14 @@ static void vmclear_local_loaded_vmcss(void)
  */
 static void kvm_cpu_vmxoff(void)
 {
+	unsigned long flags;
+
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 
 	intel_pt_handle_vmx(0);
-	cr4_clear_bits(X86_CR4_VMXE);
+	local_irq_save(flags);
+	cr4_clear_bits_irqs_off(X86_CR4_VMXE);
+	local_irq_restore(flags);
 }
 
 static void hardware_disable(void)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index af5c1ed21d43..ddd248acab8e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -199,6 +199,8 @@ static void setup_pcid(void)
 #ifdef CONFIG_X86_64
 	if (boot_cpu_has(X86_FEATURE_PCID)) {
 		if (boot_cpu_has(X86_FEATURE_PGE)) {
+			unsigned long flags;
+
 			/*
 			 * This can't be cr4_set_bits_and_update_boot() --
 			 * the trampoline code can't handle CR4.PCIDE and
@@ -210,7 +212,9 @@ static void setup_pcid(void)
 			 * Instead, we brute-force it and set CR4.PCIDE
 			 * manually in start_secondary().
 			 */
-			cr4_set_bits(X86_CR4_PCIDE);
+			local_irq_save(flags);
+			cr4_set_bits_irqs_off(X86_CR4_PCIDE);
+			local_irq_restore(flags);
 		} else {
 			/*
 			 * flush_tlb_all(), as currently implemented, won't

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-17 18:19 ` Nadav Amit
  0 siblings, 0 replies; 14+ messages in thread
From: Nadav Amit @ 2017-11-17 18:19 UTC (permalink / raw)
  To: linux-edac, kvm
  Cc: nadav.amit, Nadav Amit, Andy Lutomirski, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, x86, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

CR4 changes need to be performed while IRQs are disabled in order to
update the CR4 shadow and the actual register atomically. Actually, they
are needed regardless of CR4 shadowing, since CR4 are performed in a
read-modify-write manner.

Currently, however, this is not the case, as can be experienced by
adding warnings when CR4 updates are performed and interrupts are
enabled. It also appears that CR4 changes with enabled interrupts can be
triggered by the user (PR_SET_TSC).

If CR4 updates are done while interrupts are enabled, an interrupt can
be delivered between the CR4 read and the corresponding write of the
modified value to CR4. If the interrupt handler changes CR4, the write
would ignore the modified value.

It is not clear there are currently interrupt handlers that modify CR4
and do not restore immediately the original value. However, a recent
patch considered doing just that. Prevent the issue by: adding a debug
warning if CR4 is updated with enabled interrupts; changing CR4
manipulation function names to reflect the fact they need disabled IRQs
and fix the callers.

Note that in some cases, e.g., kvm_cpu_vmxon(), it appears that saving
and restoring the IRQs could have been spared. Yet, since these calls do
not appear to be on the hot path, save and restore the IRQs to be on the
safe side.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>

Signed-off-by: Nadav Amit <namit@vmware.com>
---
 arch/x86/include/asm/mmu_context.h   |  4 ++--
 arch/x86/include/asm/tlbflush.h      | 31 +++++++++++++++++++----------
 arch/x86/include/asm/virtext.h       |  2 +-
 arch/x86/kernel/cpu/common.c         | 38 ++++++++++++++++++++++++++----------
 arch/x86/kernel/cpu/mcheck/mce.c     |  5 ++++-
 arch/x86/kernel/cpu/mcheck/p5.c      |  6 +++++-
 arch/x86/kernel/cpu/mcheck/winchip.c |  5 ++++-
 arch/x86/kernel/fpu/init.c           |  2 +-
 arch/x86/kernel/fpu/xstate.c         |  4 ++--
 arch/x86/kernel/process.c            | 20 ++++++++++++++-----
 arch/x86/kernel/reboot.c             |  2 +-
 arch/x86/kvm/vmx.c                   | 13 ++++++++++--
 arch/x86/mm/init.c                   |  6 +++++-
 13 files changed, 100 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6699fc441644..637cbda77eda 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -30,9 +30,9 @@ static inline void load_mm_cr4(struct mm_struct *mm)
 {
 	if (static_key_false(&rdpmc_always_available) ||
 	    atomic_read(&mm->context.perf_rdpmc_allowed))
-		cr4_set_bits(X86_CR4_PCE);
+		cr4_set_bits_irqs_off(X86_CR4_PCE);
 	else
-		cr4_clear_bits(X86_CR4_PCE);
+		cr4_clear_bits_irqs_off(X86_CR4_PCE);
 }
 #else
 static inline void load_mm_cr4(struct mm_struct *mm) {}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..8ce9d7b10793 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -173,40 +173,46 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
+static inline void update_cr4(unsigned long cr4)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(!irqs_disabled());
+#endif
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
+static inline void cr4_set_bits_irqs_off(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 | mask) != cr4) {
 		cr4 |= mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
+		update_cr4(cr4);
 	}
 }
 
 /* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
+static inline void cr4_clear_bits_irqs_off(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 & ~mask) != cr4) {
 		cr4 &= ~mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
+		update_cr4(cr4);
 	}
 }
 
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqs_off(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	cr4 ^= mask;
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
+	update_cr4(cr4);
 }
 
 /* Read the CR4 shadow. */
@@ -226,10 +232,15 @@ extern u32 *trampoline_cr4_features;
 
 static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 {
+	unsigned long flags;
+
 	mmu_cr4_features |= mask;
 	if (trampoline_cr4_features)
 		*trampoline_cr4_features = mmu_cr4_features;
-	cr4_set_bits(mask);
+
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(mask);
+	local_irq_restore(flags);
 }
 
 extern void initialize_tlbstate_and_flush(void);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 0116b2ee9e64..b403ca417b7d 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -41,7 +41,7 @@ static inline int cpu_has_vmx(void)
 static inline void cpu_vmxoff(void)
 {
 	asm volatile (ASM_VMX_VMXOFF : : : "cc");
-	cr4_clear_bits(X86_CR4_VMXE);
+	cr4_clear_bits_irqs_off(X86_CR4_VMXE);
 }
 
 static inline int cpu_vmx_enabled(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..f31890787d01 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -302,8 +302,14 @@ __setup("nosmep", setup_disable_smep);
 
 static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_SMEP))
-		cr4_set_bits(X86_CR4_SMEP);
+	unsigned long flags;
+
+	if (!cpu_has(c, X86_FEATURE_SMEP))
+		return;
+
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_SMEP);
+	local_irq_restore(flags);
 }
 
 static __init int setup_disable_smap(char *arg)
@@ -320,13 +326,16 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	/* This should have been cleared long ago */
 	BUG_ON(eflags & X86_EFLAGS_AC);
 
-	if (cpu_has(c, X86_FEATURE_SMAP)) {
+	if (!cpu_has(c, X86_FEATURE_SMAP))
+		return;
+
+	local_irq_save(eflags);
 #ifdef CONFIG_X86_SMAP
-		cr4_set_bits(X86_CR4_SMAP);
+	cr4_set_bits_irqs_off(X86_CR4_SMAP);
 #else
-		cr4_clear_bits(X86_CR4_SMAP);
+	cr4_clear_bits_irqs_off(X86_CR4_SMAP);
 #endif
-	}
+	local_irq_restore(eflags);
 }
 
 /*
@@ -336,6 +345,8 @@ static bool pku_disabled;
 
 static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
+
 	/* check the boot processor, plus compile options for PKU: */
 	if (!cpu_feature_enabled(X86_FEATURE_PKU))
 		return;
@@ -345,7 +356,10 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 	if (pku_disabled)
 		return;
 
-	cr4_set_bits(X86_CR4_PKE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_PKE);
+	local_irq_restore(flags);
+
 	/*
 	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
 	 * cpuid bit to be set.  We need to ensure that we
@@ -1147,7 +1161,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
-	/* Set up SMEP/SMAP */
+	/*
+	 * Set up SMEP/SMAP. Disable interrupts to prevent triggering a warning
+	 * as CR4 changes must be done with disabled interrupts.
+	 */
 	setup_smep(c);
 	setup_smap(c);
 
@@ -1520,7 +1537,7 @@ void cpu_init(void)
 
 	pr_debug("Initializing CPU#%d\n", cpu);
 
-	cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+	cr4_clear_bits_irqs_off(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
 	/*
 	 * Initialize the per-CPU GDT with the boot GDT,
@@ -1613,7 +1630,8 @@ void cpu_init(void)
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
 	    boot_cpu_has(X86_FEATURE_TSC) ||
 	    boot_cpu_has(X86_FEATURE_DE))
-		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+		cr4_clear_bits_irqs_off(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|
+					X86_CR4_DE);
 
 	load_current_idt();
 	switch_to_new_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..1aac196eb145 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
 {
 	enum mcp_flags m_fl = 0;
 	mce_banks_t all_banks;
+	unsigned long flags;
 	u64 cap;
 
 	if (!mca_cfg.bootlog)
@@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
 	bitmap_fill(all_banks, MAX_NR_BANKS);
 	machine_check_poll(MCP_UC | m_fl, &all_banks);
 
-	cr4_set_bits(X86_CR4_MCE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_MCE);
+	local_irq_restore(flags);
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 5cddf831720f..e4bb9573cfe5 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -43,6 +43,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
 /* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	u32 l, h;
 
 	/* Default P5 to off as its often misconnected: */
@@ -63,7 +64,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 	pr_info("Intel old style machine check architecture supported.\n");
 
 	/* Enable MCE: */
-	cr4_set_bits(X86_CR4_MCE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_MCE);
+	local_irq_restore(flags);
+
 	pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 }
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 3b45b270a865..72213d75c865 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -27,6 +27,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 /* Set up machine check reporting on the Winchip C6 series */
 void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	u32 lo, hi;
 
 	machine_check_vector = winchip_machine_check;
@@ -38,7 +39,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
 
-	cr4_set_bits(X86_CR4_MCE);
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_MCE);
+	local_irq_restore(flags);
 
 	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e3d9a5..db57b217e123 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -23,7 +23,7 @@ static void fpu__init_cpu_generic(void)
 	if (boot_cpu_has(X86_FEATURE_XMM))
 		cr4_mask |= X86_CR4_OSXMMEXCPT;
 	if (cr4_mask)
-		cr4_set_bits(cr4_mask);
+		cr4_set_bits_irqs_off(cr4_mask);
 
 	cr0 = read_cr0();
 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..9d3c7a1a4ce5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -237,7 +237,7 @@ void fpu__init_cpu_xstate(void)
 
 	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
-	cr4_set_bits(X86_CR4_OSXSAVE);
+	cr4_set_bits_irqs_off(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 }
 
@@ -713,7 +713,7 @@ static int init_xstate_size(void)
 static void fpu__init_disable_system_xstate(void)
 {
 	xfeatures_mask = 0;
-	cr4_clear_bits(X86_CR4_OSXSAVE);
+	cr4_clear_bits_irqs_off(X86_CR4_OSXSAVE);
 	fpu__xstate_clear_all_cpu_caps();
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c67685337c5a..412265fe14df 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -128,25 +128,35 @@ void flush_thread(void)
 
 void disable_TSC(void)
 {
+	unsigned long flags;
+
 	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOTSC))
+	if (!test_and_set_thread_flag(TIF_NOTSC)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
-		cr4_set_bits(X86_CR4_TSD);
+		local_irq_save(flags);
+		cr4_set_bits_irqs_off(X86_CR4_TSD);
+		local_irq_restore(flags);
+	}
 	preempt_enable();
 }
 
 static void enable_TSC(void)
 {
+	unsigned long flags;
+
 	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOTSC))
+	if (test_and_clear_thread_flag(TIF_NOTSC)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
-		cr4_clear_bits(X86_CR4_TSD);
+		local_irq_save(flags);
+		cr4_clear_bits_irqs_off(X86_CR4_TSD);
+		local_irq_restore(flags);
+	}
 	preempt_enable();
 }
 
@@ -293,7 +303,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	}
 
 	if ((tifp ^ tifn) & _TIF_NOTSC)
-		cr4_toggle_bits(X86_CR4_TSD);
+		cr4_toggle_bits_irqs_off(X86_CR4_TSD);
 
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2126b9d27c34..86ad70c02607 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -109,7 +109,7 @@ void __noreturn machine_real_restart(unsigned int type)
 
 	/* Exiting long mode will fail if CR4.PCIDE is set. */
 	if (static_cpu_has(X86_FEATURE_PCID))
-		cr4_clear_bits(X86_CR4_PCIDE);
+		cr4_clear_bits_irqs_off(X86_CR4_PCIDE);
 #endif
 
 	/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a6f4f095f8f4..a0b387dfbf5c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3500,7 +3500,12 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
-	cr4_set_bits(X86_CR4_VMXE);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cr4_set_bits_irqs_off(X86_CR4_VMXE);
+	local_irq_restore(flags);
+
 	intel_pt_handle_vmx(1);
 
 	asm volatile (ASM_VMX_VMXON_RAX
@@ -3565,10 +3570,14 @@ static void vmclear_local_loaded_vmcss(void)
  */
 static void kvm_cpu_vmxoff(void)
 {
+	unsigned long flags;
+
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 
 	intel_pt_handle_vmx(0);
-	cr4_clear_bits(X86_CR4_VMXE);
+	local_irq_save(flags);
+	cr4_clear_bits_irqs_off(X86_CR4_VMXE);
+	local_irq_restore(flags);
 }
 
 static void hardware_disable(void)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index af5c1ed21d43..ddd248acab8e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -199,6 +199,8 @@ static void setup_pcid(void)
 #ifdef CONFIG_X86_64
 	if (boot_cpu_has(X86_FEATURE_PCID)) {
 		if (boot_cpu_has(X86_FEATURE_PGE)) {
+			unsigned long flags;
+
 			/*
 			 * This can't be cr4_set_bits_and_update_boot() --
 			 * the trampoline code can't handle CR4.PCIDE and
@@ -210,7 +212,9 @@ static void setup_pcid(void)
 			 * Instead, we brute-force it and set CR4.PCIDE
 			 * manually in start_secondary().
 			 */
-			cr4_set_bits(X86_CR4_PCIDE);
+			local_irq_save(flags);
+			cr4_set_bits_irqs_off(X86_CR4_PCIDE);
+			local_irq_restore(flags);
 		} else {
 			/*
 			 * flush_tlb_all(), as currently implemented, won't
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* x86: disable IRQs before changing CR4
  2017-11-17 18:19 ` [PATCH] " Nadav Amit
@ 2017-11-20 11:55 ` Thomas Gleixner
  -1 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-11-20 11:55 UTC (permalink / raw)
  To: Nadav Amit
  Cc: linux-edac, kvm, nadav.amit, Andy Lutomirski, Ingo Molnar,
	H. Peter Anvin, x86, Tony Luck, Borislav Petkov, Paolo Bonzini,
	Radim Krčmář

On Fri, 17 Nov 2017, Nadav Amit wrote:

> CR4 changes need to be performed while IRQs are disabled in order to
> update the CR4 shadow and the actual register atomically. Actually, they
> are needed regardless of CR4 shadowing, since CR4 are performed in a
> read-modify-write manner.

I have a hard time to figure out why that RMW protections needs to be
interrupt disable. Which call site happens to be in interrupt context?

If there is none, then the proper protection is preemption disabled which
can be done without all that churn.

Thanks,

	tglx
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-20 11:55 ` Thomas Gleixner
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-11-20 11:55 UTC (permalink / raw)
  To: Nadav Amit
  Cc: linux-edac, kvm, nadav.amit, Andy Lutomirski, Ingo Molnar,
	H. Peter Anvin, x86, Tony Luck, Borislav Petkov, Paolo Bonzini,
	Radim Krčmář

On Fri, 17 Nov 2017, Nadav Amit wrote:

> CR4 changes need to be performed while IRQs are disabled in order to
> update the CR4 shadow and the actual register atomically. Actually, they
> are needed regardless of CR4 shadowing, since CR4 are performed in a
> read-modify-write manner.

I have a hard time to figure out why that RMW protections needs to be
interrupt disable. Which call site happens to be in interrupt context?

If there is none, then the proper protection is preemption disabled which
can be done without all that churn.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* x86: disable IRQs before changing CR4
  2017-11-20 11:55 ` [PATCH] " Thomas Gleixner
@ 2017-11-20 14:34 ` Andy Lutomirski
  -1 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-11-20 14:34 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Nadav Amit, linux-edac, kvm list, Nadav Amit, Andy Lutomirski,
	Ingo Molnar, H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

On Mon, Nov 20, 2017 at 3:55 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Fri, 17 Nov 2017, Nadav Amit wrote:
>
>> CR4 changes need to be performed while IRQs are disabled in order to
>> update the CR4 shadow and the actual register atomically. Actually, they
>> are needed regardless of CR4 shadowing, since CR4 are performed in a
>> read-modify-write manner.
>
> I have a hard time to figure out why that RMW protections needs to be
> interrupt disable. Which call site happens to be in interrupt context?

It's the flush_tlb_all() stuff.  We use the cr4 accessors in the IPI handler.

>
> If there is none, then the proper protection is preemption disabled which
> can be done without all that churn.
>
> Thanks,
>
>         tglx
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-20 14:34 ` Andy Lutomirski
  0 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-11-20 14:34 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Nadav Amit, linux-edac, kvm list, Nadav Amit, Andy Lutomirski,
	Ingo Molnar, H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

On Mon, Nov 20, 2017 at 3:55 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Fri, 17 Nov 2017, Nadav Amit wrote:
>
>> CR4 changes need to be performed while IRQs are disabled in order to
>> update the CR4 shadow and the actual register atomically. Actually, they
>> are needed regardless of CR4 shadowing, since CR4 are performed in a
>> read-modify-write manner.
>
> I have a hard time to figure out why that RMW protections needs to be
> interrupt disable. Which call site happens to be in interrupt context?

It's the flush_tlb_all() stuff.  We use the cr4 accessors in the IPI handler.

>
> If there is none, then the proper protection is preemption disabled which
> can be done without all that churn.
>
> Thanks,
>
>         tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* x86: disable IRQs before changing CR4
  2017-11-20 14:34 ` [PATCH] " Andy Lutomirski
@ 2017-11-20 18:05 ` Thomas Gleixner
  -1 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-11-20 18:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Nadav Amit, linux-edac, kvm list, Nadav Amit, Ingo Molnar,
	H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář,
	lkml

On Mon, 20 Nov 2017, Andy Lutomirski wrote:

> On Mon, Nov 20, 2017 at 3:55 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > On Fri, 17 Nov 2017, Nadav Amit wrote:
> >
> >> CR4 changes need to be performed while IRQs are disabled in order to
> >> update the CR4 shadow and the actual register atomically. Actually, they
> >> are needed regardless of CR4 shadowing, since CR4 are performed in a
> >> read-modify-write manner.
> >
> > I have a hard time to figure out why that RMW protections needs to be
> > interrupt disable. Which call site happens to be in interrupt context?
> 
> It's the flush_tlb_all() stuff.  We use the cr4 accessors in the IPI handler.

Fair enough. Then this wants to be spelled out in the change log.

Thanks,

	tglx
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-20 18:05 ` Thomas Gleixner
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-11-20 18:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Nadav Amit, linux-edac, kvm list, Nadav Amit, Ingo Molnar,
	H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář,
	lkml

On Mon, 20 Nov 2017, Andy Lutomirski wrote:

> On Mon, Nov 20, 2017 at 3:55 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > On Fri, 17 Nov 2017, Nadav Amit wrote:
> >
> >> CR4 changes need to be performed while IRQs are disabled in order to
> >> update the CR4 shadow and the actual register atomically. Actually, they
> >> are needed regardless of CR4 shadowing, since CR4 are performed in a
> >> read-modify-write manner.
> >
> > I have a hard time to figure out why that RMW protections needs to be
> > interrupt disable. Which call site happens to be in interrupt context?
> 
> It's the flush_tlb_all() stuff.  We use the cr4 accessors in the IPI handler.

Fair enough. Then this wants to be spelled out in the change log.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* x86: disable IRQs before changing CR4
  2017-11-17 18:19 ` [PATCH] " Nadav Amit
@ 2017-11-20 18:16 ` Andy Lutomirski
  -1 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-11-20 18:16 UTC (permalink / raw)
  To: Nadav Amit
  Cc: linux-edac, kvm list, Nadav Amit, Andy Lutomirski,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML, Tony Luck,
	Borislav Petkov, Paolo Bonzini, Radim Krčmář

On Fri, Nov 17, 2017 at 10:19 AM, Nadav Amit <namit@vmware.com> wrote:

> +static inline void update_cr4(unsigned long cr4)
> +{
> +#ifdef CONFIG_LOCKDEP
> +       WARN_ON(!irqs_disabled());
> +#endif
> +       this_cpu_write(cpu_tlbstate.cr4, cr4);
> +       __write_cr4(cr4);
> +}

Let's call this __cr4_set() instead of update_cr4().  The __ is to
remind people that it's not really for use and starting with cr4_ is
consistent with the rest of the functions.

Also, can you split this whole thing into two patches?  The
introduction of the separate update function is a great cleanup, but I
think it's orthogonal.
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-20 18:16 ` Andy Lutomirski
  0 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-11-20 18:16 UTC (permalink / raw)
  To: Nadav Amit
  Cc: linux-edac, kvm list, Nadav Amit, Andy Lutomirski,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML, Tony Luck,
	Borislav Petkov, Paolo Bonzini, Radim Krčmář

On Fri, Nov 17, 2017 at 10:19 AM, Nadav Amit <namit@vmware.com> wrote:

> +static inline void update_cr4(unsigned long cr4)
> +{
> +#ifdef CONFIG_LOCKDEP
> +       WARN_ON(!irqs_disabled());
> +#endif
> +       this_cpu_write(cpu_tlbstate.cr4, cr4);
> +       __write_cr4(cr4);
> +}

Let's call this __cr4_set() instead of update_cr4().  The __ is to
remind people that it's not really for use and starting with cr4_ is
consistent with the rest of the functions.

Also, can you split this whole thing into two patches?  The
introduction of the separate update function is a great cleanup, but I
think it's orthogonal.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* x86: disable IRQs before changing CR4
  2017-11-20 18:16 ` [PATCH] " Andy Lutomirski
@ 2017-11-20 19:51 ` Nadav Amit
  -1 siblings, 0 replies; 14+ messages in thread
From: Nadav Amit @ 2017-11-20 19:51 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-edac, kvm list, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

Andy Lutomirski <luto@kernel.org> wrote:

> On Fri, Nov 17, 2017 at 10:19 AM, Nadav Amit <namit@vmware.com> wrote:
> 
>> +static inline void update_cr4(unsigned long cr4)
>> +{
>> +#ifdef CONFIG_LOCKDEP
>> +       WARN_ON(!irqs_disabled());
>> +#endif
>> +       this_cpu_write(cpu_tlbstate.cr4, cr4);
>> +       __write_cr4(cr4);
>> +}
> 
> Let's call this __cr4_set() instead of update_cr4().  The __ is to
> remind people that it's not really for use and starting with cr4_ is
> consistent with the rest of the functions.
> 
> Also, can you split this whole thing into two patches?  The
> introduction of the separate update function is a great cleanup, but I
> think it's orthogonal.

Sure. Will do. Thanks for reviewing it.

Just one small clarification: I do not see an issue of correctness right now
if __native_flush_tlb_global_irq_disabled() is the only one that modifies
CR4 in an interrupt handler. Yet, the current scheme seems error prone.

Regards,
Nadav
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-20 19:51 ` Nadav Amit
  0 siblings, 0 replies; 14+ messages in thread
From: Nadav Amit @ 2017-11-20 19:51 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-edac, kvm list, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

Andy Lutomirski <luto@kernel.org> wrote:

> On Fri, Nov 17, 2017 at 10:19 AM, Nadav Amit <namit@vmware.com> wrote:
> 
>> +static inline void update_cr4(unsigned long cr4)
>> +{
>> +#ifdef CONFIG_LOCKDEP
>> +       WARN_ON(!irqs_disabled());
>> +#endif
>> +       this_cpu_write(cpu_tlbstate.cr4, cr4);
>> +       __write_cr4(cr4);
>> +}
> 
> Let's call this __cr4_set() instead of update_cr4().  The __ is to
> remind people that it's not really for use and starting with cr4_ is
> consistent with the rest of the functions.
> 
> Also, can you split this whole thing into two patches?  The
> introduction of the separate update function is a great cleanup, but I
> think it's orthogonal.

Sure. Will do. Thanks for reviewing it.

Just one small clarification: I do not see an issue of correctness right now
if __native_flush_tlb_global_irq_disabled() is the only one that modifies
CR4 in an interrupt handler. Yet, the current scheme seems error prone.

Regards,
Nadav

^ permalink raw reply	[flat|nested] 14+ messages in thread

* x86: disable IRQs before changing CR4
  2017-11-20 19:51 ` [PATCH] " Nadav Amit
@ 2017-11-21  1:08 ` Andy Lutomirski
  -1 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-11-21  1:08 UTC (permalink / raw)
  To: Nadav Amit
  Cc: Andy Lutomirski, linux-edac, kvm list, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

On Mon, Nov 20, 2017 at 11:51 AM, Nadav Amit <namit@vmware.com> wrote:
> Andy Lutomirski <luto@kernel.org> wrote:
>
>> On Fri, Nov 17, 2017 at 10:19 AM, Nadav Amit <namit@vmware.com> wrote:
>>
>>> +static inline void update_cr4(unsigned long cr4)
>>> +{
>>> +#ifdef CONFIG_LOCKDEP
>>> +       WARN_ON(!irqs_disabled());
>>> +#endif
>>> +       this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> +       __write_cr4(cr4);
>>> +}
>>
>> Let's call this __cr4_set() instead of update_cr4().  The __ is to
>> remind people that it's not really for use and starting with cr4_ is
>> consistent with the rest of the functions.
>>
>> Also, can you split this whole thing into two patches?  The
>> introduction of the separate update function is a great cleanup, but I
>> think it's orthogonal.
>
> Sure. Will do. Thanks for reviewing it.
>
> Just one small clarification: I do not see an issue of correctness right now
> if __native_flush_tlb_global_irq_disabled() is the only one that modifies
> CR4 in an interrupt handler. Yet, the current scheme seems error prone.

If we ever do cr4_whatever() with preempt *on*, then we could be
preempted and we're screwed, probably exploitably.  I haven't looked
carefully enough to decide whether I think that the IPI could cause a
real problem.
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86: disable IRQs before changing CR4
@ 2017-11-21  1:08 ` Andy Lutomirski
  0 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-11-21  1:08 UTC (permalink / raw)
  To: Nadav Amit
  Cc: Andy Lutomirski, linux-edac, kvm list, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, X86 ML, Tony Luck, Borislav Petkov,
	Paolo Bonzini, Radim Krčmář

On Mon, Nov 20, 2017 at 11:51 AM, Nadav Amit <namit@vmware.com> wrote:
> Andy Lutomirski <luto@kernel.org> wrote:
>
>> On Fri, Nov 17, 2017 at 10:19 AM, Nadav Amit <namit@vmware.com> wrote:
>>
>>> +static inline void update_cr4(unsigned long cr4)
>>> +{
>>> +#ifdef CONFIG_LOCKDEP
>>> +       WARN_ON(!irqs_disabled());
>>> +#endif
>>> +       this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> +       __write_cr4(cr4);
>>> +}
>>
>> Let's call this __cr4_set() instead of update_cr4().  The __ is to
>> remind people that it's not really for use and starting with cr4_ is
>> consistent with the rest of the functions.
>>
>> Also, can you split this whole thing into two patches?  The
>> introduction of the separate update function is a great cleanup, but I
>> think it's orthogonal.
>
> Sure. Will do. Thanks for reviewing it.
>
> Just one small clarification: I do not see an issue of correctness right now
> if __native_flush_tlb_global_irq_disabled() is the only one that modifies
> CR4 in an interrupt handler. Yet, the current scheme seems error prone.

If we ever do cr4_whatever() with preempt *on*, then we could be
preempted and we're screwed, probably exploitably.  I haven't looked
carefully enough to decide whether I think that the IPI could cause a
real problem.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-11-21  1:08 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-20 19:51 x86: disable IRQs before changing CR4 Nadav Amit
2017-11-20 19:51 ` [PATCH] " Nadav Amit
  -- strict thread matches above, loose matches on Subject: below --
2017-11-21  1:08 Andy Lutomirski
2017-11-21  1:08 ` [PATCH] " Andy Lutomirski
2017-11-20 18:16 Andy Lutomirski
2017-11-20 18:16 ` [PATCH] " Andy Lutomirski
2017-11-20 18:05 Thomas Gleixner
2017-11-20 18:05 ` [PATCH] " Thomas Gleixner
2017-11-20 14:34 Andy Lutomirski
2017-11-20 14:34 ` [PATCH] " Andy Lutomirski
2017-11-20 11:55 Thomas Gleixner
2017-11-20 11:55 ` [PATCH] " Thomas Gleixner
2017-11-17 18:19 Nadav Amit
2017-11-17 18:19 ` [PATCH] " Nadav Amit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.