All of lore.kernel.org
 help / color / mirror / Atom feed
* x86: CR4 update when IRQs are enabled
@ 2017-11-08 21:52 Nadav Amit
  2017-11-09  6:29 ` Nadav Amit
  0 siblings, 1 reply; 6+ messages in thread
From: Nadav Amit @ 2017-11-08 21:52 UTC (permalink / raw)
  To: LKML, Andy Lutomirski

I am puzzled by the comment in tlb_state.cr4 , which says:

        /*
         * Access to this CR4 shadow and to H/W CR4 is protected by
         * disabling interrupts when modifying either one.
         */

This does not seem to be true and adding a warning to CR4 writes when
!irqs_disabled() immediately fires in identify_cpu() and
__mcheck_cpu_init_generic(). While these two are called during boot, I think
there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.

So my question(s): Is the comment correct? Is the current behavior correct?

Thanks,
Nadav

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: x86: CR4 update when IRQs are enabled
  2017-11-08 21:52 x86: CR4 update when IRQs are enabled Nadav Amit
@ 2017-11-09  6:29 ` Nadav Amit
  2017-11-15 23:31   ` Nadav Amit
  0 siblings, 1 reply; 6+ messages in thread
From: Nadav Amit @ 2017-11-09  6:29 UTC (permalink / raw)
  To: Dave Hansen, H. Peter Anvin, Thomas Gleixner, Borislav Petkov,
	the arch/x86 maintainers, Andy Lutomirski
  Cc: LKML

CC’ing more people, and adding a patch to clarify...

Nadav Amit <nadav.amit@gmail.com> wrote:

> I am puzzled by the comment in tlb_state.cr4 , which says:
> 
>        /*
>         * Access to this CR4 shadow and to H/W CR4 is protected by
>         * disabling interrupts when modifying either one.
>         */
> 
> This does not seem to be true and adding a warning to CR4 writes when
> !irqs_disabled() immediately fires in identify_cpu() and
> __mcheck_cpu_init_generic(). While these two are called during boot, I think
> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
> 
> So my question(s): Is the comment correct? Is the current behavior correct?

So here is what I have in mind. I am not sure whether CONFIG_DEBUG_PREEMPT is
the right #ifdef. Let me know what you think.
 
-- >8 --

Subject: [PATCH] x86: disable IRQs before changing CR4

CR4 changes need to be performed while IRQs are disabled in order to
update the CR4 shadow and the actual register atomically.

Signed-off-by: Nadav Amit <namit@vmware.com>
---
 arch/x86/include/asm/tlbflush.h      | 18 ++++++++++++------
 arch/x86/kernel/cpu/common.c         | 13 ++++++++++++-
 arch/x86/kernel/cpu/mcheck/mce.c     |  3 +++
 arch/x86/kernel/cpu/mcheck/p5.c      |  4 ++++
 arch/x86/kernel/cpu/mcheck/winchip.c |  3 +++
 arch/x86/kernel/process.c            | 14 ++++++++++++--
 6 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50ea3482e1d1..bc70dd1cc7c6 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
+static inline void update_cr4(unsigned long cr4)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	WARN_ON_ONCE(!irqs_disabled());
+#endif
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Set in this cpu's CR4. */
 static inline void cr4_set_bits(unsigned long mask)
 {
@@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long mask)
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 | mask) != cr4) {
 		cr4 |= mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
+		update_cr4(cr4);
 	}
 }
 
@@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long mask)
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 & ~mask) != cr4) {
 		cr4 &= ~mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
+		update_cr4(cr4);
 	}
 }
 
@@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long mask)
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	cr4 ^= mask;
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
+	update_cr4(cr4);
 }
 
 /* Read the CR4 shadow. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c8b39870f33e..82e6b41fd5e9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -318,6 +318,8 @@ static bool pku_disabled;
 
 static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
+
 	/* check the boot processor, plus compile options for PKU: */
 	if (!cpu_feature_enabled(X86_FEATURE_PKU))
 		return;
@@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 	if (pku_disabled)
 		return;
 
+	local_irq_save(flags);
 	cr4_set_bits(X86_CR4_PKE);
+	local_irq_restore(flags);
+
 	/*
 	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
 	 * cpuid bit to be set.  We need to ensure that we
@@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
  */
 static void identify_cpu(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	int i;
 
 	c->loops_per_jiffy = loops_per_jiffy;
@@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
-	/* Set up SMEP/SMAP */
+	/*
+	 * Set up SMEP/SMAP. Disable interrupts to prevent triggering a warning
+	 * as CR4 changes must be done with disabled interrupts.
+	 */
+	local_irq_save(flags);
 	setup_smep(c);
 	setup_smap(c);
+	local_irq_restore(flags);
 
 	/*
 	 * The vendor-specific functions might have changed features.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..497c07e33c25 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
 {
 	enum mcp_flags m_fl = 0;
 	mce_banks_t all_banks;
+	unsigned long flags;
 	u64 cap;
 
 	if (!mca_cfg.bootlog)
@@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
 	bitmap_fill(all_banks, MAX_NR_BANKS);
 	machine_check_poll(MCP_UC | m_fl, &all_banks);
 
+	local_irq_save(flags);
 	cr4_set_bits(X86_CR4_MCE);
+	local_irq_restore(flags);
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2a0717bf8033..d5d4963415e9 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
 /* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	u32 l, h;
 
 	/* Default P5 to off as its often misconnected: */
@@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 	pr_info("Intel old style machine check architecture supported.\n");
 
 	/* Enable MCE: */
+	local_irq_save(flags);
 	cr4_set_bits(X86_CR4_MCE);
+	local_irq_restore(flags);
+
 	pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 }
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index c6a722e1d011..6dd985e3849d 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 /* Set up machine check reporting on the Winchip C6 series */
 void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
+	unsigned long flags;
 	u32 lo, hi;
 
 	machine_check_vector = winchip_machine_check;
@@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
 
+	local_irq_save(flags);
 	cr4_set_bits(X86_CR4_MCE);
+	local_irq_restore(flags);
 
 	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca198080ea9..09e44e784745 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -127,25 +127,35 @@ void flush_thread(void)
 
 void disable_TSC(void)
 {
+	unsigned long flags;
+
 	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOTSC))
+	if (!test_and_set_thread_flag(TIF_NOTSC)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
+		local_irq_save(flags);
 		cr4_set_bits(X86_CR4_TSD);
+		local_irq_restore(flags);
+	}
 	preempt_enable();
 }
 
 static void enable_TSC(void)
 {
+	unsigned long flags;
+
 	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOTSC))
+	if (test_and_clear_thread_flag(TIF_NOTSC)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
+		local_irq_save(flags);
 		cr4_clear_bits(X86_CR4_TSD);
+		local_irq_restore(flags);
+	}
 	preempt_enable();
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: x86: CR4 update when IRQs are enabled
  2017-11-09  6:29 ` Nadav Amit
@ 2017-11-15 23:31   ` Nadav Amit
  2017-11-15 23:48     ` hpa
  0 siblings, 1 reply; 6+ messages in thread
From: Nadav Amit @ 2017-11-15 23:31 UTC (permalink / raw)
  To: Dave Hansen, H. Peter Anvin, Thomas Gleixner, Borislav Petkov,
	the arch/x86 maintainers, Andy Lutomirski
  Cc: LKML

Ping?

Nadav Amit <nadav.amit@gmail.com> wrote:

> CC’ing more people, and adding a patch to clarify...
> 
> Nadav Amit <nadav.amit@gmail.com> wrote:
> 
>> I am puzzled by the comment in tlb_state.cr4 , which says:
>> 
>>       /*
>>        * Access to this CR4 shadow and to H/W CR4 is protected by
>>        * disabling interrupts when modifying either one.
>>        */
>> 
>> This does not seem to be true and adding a warning to CR4 writes when
>> !irqs_disabled() immediately fires in identify_cpu() and
>> __mcheck_cpu_init_generic(). While these two are called during boot, I think
>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>> 
>> So my question(s): Is the comment correct? Is the current behavior correct?
> 
> So here is what I have in mind. I am not sure whether CONFIG_DEBUG_PREEMPT is
> the right #ifdef. Let me know what you think.
> 
> -- >8 --
> 
> Subject: [PATCH] x86: disable IRQs before changing CR4
> 
> CR4 changes need to be performed while IRQs are disabled in order to
> update the CR4 shadow and the actual register atomically.
> 
> Signed-off-by: Nadav Amit <namit@vmware.com>
> ---
> arch/x86/include/asm/tlbflush.h      | 18 ++++++++++++------
> arch/x86/kernel/cpu/common.c         | 13 ++++++++++++-
> arch/x86/kernel/cpu/mcheck/mce.c     |  3 +++
> arch/x86/kernel/cpu/mcheck/p5.c      |  4 ++++
> arch/x86/kernel/cpu/mcheck/winchip.c |  3 +++
> arch/x86/kernel/process.c            | 14 ++++++++++++--
> 6 files changed, 46 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index 50ea3482e1d1..bc70dd1cc7c6 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
> 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
> }
> 
> +static inline void update_cr4(unsigned long cr4)
> +{
> +#ifdef CONFIG_DEBUG_PREEMPT
> +	WARN_ON_ONCE(!irqs_disabled());
> +#endif
> +	this_cpu_write(cpu_tlbstate.cr4, cr4);
> +	__write_cr4(cr4);
> +}
> +
> /* Set in this cpu's CR4. */
> static inline void cr4_set_bits(unsigned long mask)
> {
> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long mask)
> 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
> 	if ((cr4 | mask) != cr4) {
> 		cr4 |= mask;
> -		this_cpu_write(cpu_tlbstate.cr4, cr4);
> -		__write_cr4(cr4);
> +		update_cr4(cr4);
> 	}
> }
> 
> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long mask)
> 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
> 	if ((cr4 & ~mask) != cr4) {
> 		cr4 &= ~mask;
> -		this_cpu_write(cpu_tlbstate.cr4, cr4);
> -		__write_cr4(cr4);
> +		update_cr4(cr4);
> 	}
> }
> 
> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long mask)
> 
> 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
> 	cr4 ^= mask;
> -	this_cpu_write(cpu_tlbstate.cr4, cr4);
> -	__write_cr4(cr4);
> +	update_cr4(cr4);
> }
> 
> /* Read the CR4 shadow. */
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index c8b39870f33e..82e6b41fd5e9 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -318,6 +318,8 @@ static bool pku_disabled;
> 
> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
> {
> +	unsigned long flags;
> +
> 	/* check the boot processor, plus compile options for PKU: */
> 	if (!cpu_feature_enabled(X86_FEATURE_PKU))
> 		return;
> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
> 	if (pku_disabled)
> 		return;
> 
> +	local_irq_save(flags);
> 	cr4_set_bits(X86_CR4_PKE);
> +	local_irq_restore(flags);
> +
> 	/*
> 	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
> 	 * cpuid bit to be set.  We need to ensure that we
> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
>  */
> static void identify_cpu(struct cpuinfo_x86 *c)
> {
> +	unsigned long flags;
> 	int i;
> 
> 	c->loops_per_jiffy = loops_per_jiffy;
> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86 *c)
> 	/* Disable the PN if appropriate */
> 	squash_the_stupid_serial_number(c);
> 
> -	/* Set up SMEP/SMAP */
> +	/*
> +	 * Set up SMEP/SMAP. Disable interrupts to prevent triggering a warning
> +	 * as CR4 changes must be done with disabled interrupts.
> +	 */
> +	local_irq_save(flags);
> 	setup_smep(c);
> 	setup_smap(c);
> +	local_irq_restore(flags);
> 
> 	/*
> 	 * The vendor-specific functions might have changed features.
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 3b413065c613..497c07e33c25 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
> {
> 	enum mcp_flags m_fl = 0;
> 	mce_banks_t all_banks;
> +	unsigned long flags;
> 	u64 cap;
> 
> 	if (!mca_cfg.bootlog)
> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
> 	bitmap_fill(all_banks, MAX_NR_BANKS);
> 	machine_check_poll(MCP_UC | m_fl, &all_banks);
> 
> +	local_irq_save(flags);
> 	cr4_set_bits(X86_CR4_MCE);
> +	local_irq_restore(flags);
> 
> 	rdmsrl(MSR_IA32_MCG_CAP, cap);
> 	if (cap & MCG_CTL_P)
> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
> index 2a0717bf8033..d5d4963415e9 100644
> --- a/arch/x86/kernel/cpu/mcheck/p5.c
> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
> /* Set up machine check reporting for processors with Intel style MCE: */
> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
> {
> +	unsigned long flags;
> 	u32 l, h;
> 
> 	/* Default P5 to off as its often misconnected: */
> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
> 	pr_info("Intel old style machine check architecture supported.\n");
> 
> 	/* Enable MCE: */
> +	local_irq_save(flags);
> 	cr4_set_bits(X86_CR4_MCE);
> +	local_irq_restore(flags);
> +
> 	pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
> 		smp_processor_id());
> }
> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
> index c6a722e1d011..6dd985e3849d 100644
> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
> /* Set up machine check reporting on the Winchip C6 series */
> void winchip_mcheck_init(struct cpuinfo_x86 *c)
> {
> +	unsigned long flags;
> 	u32 lo, hi;
> 
> 	machine_check_vector = winchip_machine_check;
> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
> 	lo &= ~(1<<4);	/* Enable MCE */
> 	wrmsr(MSR_IDT_FCR1, lo, hi);
> 
> +	local_irq_save(flags);
> 	cr4_set_bits(X86_CR4_MCE);
> +	local_irq_restore(flags);
> 
> 	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
> }
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 3ca198080ea9..09e44e784745 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -127,25 +127,35 @@ void flush_thread(void)
> 
> void disable_TSC(void)
> {
> +	unsigned long flags;
> +
> 	preempt_disable();
> -	if (!test_and_set_thread_flag(TIF_NOTSC))
> +	if (!test_and_set_thread_flag(TIF_NOTSC)) {
> 		/*
> 		 * Must flip the CPU state synchronously with
> 		 * TIF_NOTSC in the current running context.
> 		 */
> +		local_irq_save(flags);
> 		cr4_set_bits(X86_CR4_TSD);
> +		local_irq_restore(flags);
> +	}
> 	preempt_enable();
> }
> 
> static void enable_TSC(void)
> {
> +	unsigned long flags;
> +
> 	preempt_disable();
> -	if (test_and_clear_thread_flag(TIF_NOTSC))
> +	if (test_and_clear_thread_flag(TIF_NOTSC)) {
> 		/*
> 		 * Must flip the CPU state synchronously with
> 		 * TIF_NOTSC in the current running context.
> 		 */
> +		local_irq_save(flags);
> 		cr4_clear_bits(X86_CR4_TSD);
> +		local_irq_restore(flags);
> +	}
> 	preempt_enable();
> }

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: x86: CR4 update when IRQs are enabled
  2017-11-15 23:31   ` Nadav Amit
@ 2017-11-15 23:48     ` hpa
  2017-11-16  0:34       ` Nadav Amit
  0 siblings, 1 reply; 6+ messages in thread
From: hpa @ 2017-11-15 23:48 UTC (permalink / raw)
  To: Nadav Amit, Dave Hansen, Thomas Gleixner, Borislav Petkov,
	the arch/x86 maintainers, Andy Lutomirski
  Cc: LKML

On November 15, 2017 3:31:50 PM PST, Nadav Amit <nadav.amit@gmail.com> wrote:
>Ping?
>
>Nadav Amit <nadav.amit@gmail.com> wrote:
>
>> CC’ing more people, and adding a patch to clarify...
>> 
>> Nadav Amit <nadav.amit@gmail.com> wrote:
>> 
>>> I am puzzled by the comment in tlb_state.cr4 , which says:
>>> 
>>>       /*
>>>        * Access to this CR4 shadow and to H/W CR4 is protected by
>>>        * disabling interrupts when modifying either one.
>>>        */
>>> 
>>> This does not seem to be true and adding a warning to CR4 writes
>when
>>> !irqs_disabled() immediately fires in identify_cpu() and
>>> __mcheck_cpu_init_generic(). While these two are called during boot,
>I think
>>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>>> 
>>> So my question(s): Is the comment correct? Is the current behavior
>correct?
>> 
>> So here is what I have in mind. I am not sure whether
>CONFIG_DEBUG_PREEMPT is
>> the right #ifdef. Let me know what you think.
>> 
>> -- >8 --
>> 
>> Subject: [PATCH] x86: disable IRQs before changing CR4
>> 
>> CR4 changes need to be performed while IRQs are disabled in order to
>> update the CR4 shadow and the actual register atomically.
>> 
>> Signed-off-by: Nadav Amit <namit@vmware.com>
>> ---
>> arch/x86/include/asm/tlbflush.h      | 18 ++++++++++++------
>> arch/x86/kernel/cpu/common.c         | 13 ++++++++++++-
>> arch/x86/kernel/cpu/mcheck/mce.c     |  3 +++
>> arch/x86/kernel/cpu/mcheck/p5.c      |  4 ++++
>> arch/x86/kernel/cpu/mcheck/winchip.c |  3 +++
>> arch/x86/kernel/process.c            | 14 ++++++++++++--
>> 6 files changed, 46 insertions(+), 9 deletions(-)
>> 
>> diff --git a/arch/x86/include/asm/tlbflush.h
>b/arch/x86/include/asm/tlbflush.h
>> index 50ea3482e1d1..bc70dd1cc7c6 100644
>> --- a/arch/x86/include/asm/tlbflush.h
>> +++ b/arch/x86/include/asm/tlbflush.h
>> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
>> 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
>> }
>> 
>> +static inline void update_cr4(unsigned long cr4)
>> +{
>> +#ifdef CONFIG_DEBUG_PREEMPT
>> +	WARN_ON_ONCE(!irqs_disabled());
>> +#endif
>> +	this_cpu_write(cpu_tlbstate.cr4, cr4);
>> +	__write_cr4(cr4);
>> +}
>> +
>> /* Set in this cpu's CR4. */
>> static inline void cr4_set_bits(unsigned long mask)
>> {
>> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long
>mask)
>> 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
>> 	if ((cr4 | mask) != cr4) {
>> 		cr4 |= mask;
>> -		this_cpu_write(cpu_tlbstate.cr4, cr4);
>> -		__write_cr4(cr4);
>> +		update_cr4(cr4);
>> 	}
>> }
>> 
>> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long
>mask)
>> 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
>> 	if ((cr4 & ~mask) != cr4) {
>> 		cr4 &= ~mask;
>> -		this_cpu_write(cpu_tlbstate.cr4, cr4);
>> -		__write_cr4(cr4);
>> +		update_cr4(cr4);
>> 	}
>> }
>> 
>> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long
>mask)
>> 
>> 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
>> 	cr4 ^= mask;
>> -	this_cpu_write(cpu_tlbstate.cr4, cr4);
>> -	__write_cr4(cr4);
>> +	update_cr4(cr4);
>> }
>> 
>> /* Read the CR4 shadow. */
>> diff --git a/arch/x86/kernel/cpu/common.c
>b/arch/x86/kernel/cpu/common.c
>> index c8b39870f33e..82e6b41fd5e9 100644
>> --- a/arch/x86/kernel/cpu/common.c
>> +++ b/arch/x86/kernel/cpu/common.c
>> @@ -318,6 +318,8 @@ static bool pku_disabled;
>> 
>> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
>> {
>> +	unsigned long flags;
>> +
>> 	/* check the boot processor, plus compile options for PKU: */
>> 	if (!cpu_feature_enabled(X86_FEATURE_PKU))
>> 		return;
>> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct
>cpuinfo_x86 *c)
>> 	if (pku_disabled)
>> 		return;
>> 
>> +	local_irq_save(flags);
>> 	cr4_set_bits(X86_CR4_PKE);
>> +	local_irq_restore(flags);
>> +
>> 	/*
>> 	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
>> 	 * cpuid bit to be set.  We need to ensure that we
>> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct
>cpuinfo_x86 *c)
>>  */
>> static void identify_cpu(struct cpuinfo_x86 *c)
>> {
>> +	unsigned long flags;
>> 	int i;
>> 
>> 	c->loops_per_jiffy = loops_per_jiffy;
>> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86
>*c)
>> 	/* Disable the PN if appropriate */
>> 	squash_the_stupid_serial_number(c);
>> 
>> -	/* Set up SMEP/SMAP */
>> +	/*
>> +	 * Set up SMEP/SMAP. Disable interrupts to prevent triggering a
>warning
>> +	 * as CR4 changes must be done with disabled interrupts.
>> +	 */
>> +	local_irq_save(flags);
>> 	setup_smep(c);
>> 	setup_smap(c);
>> +	local_irq_restore(flags);
>> 
>> 	/*
>> 	 * The vendor-specific functions might have changed features.
>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
>b/arch/x86/kernel/cpu/mcheck/mce.c
>> index 3b413065c613..497c07e33c25 100644
>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
>> {
>> 	enum mcp_flags m_fl = 0;
>> 	mce_banks_t all_banks;
>> +	unsigned long flags;
>> 	u64 cap;
>> 
>> 	if (!mca_cfg.bootlog)
>> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
>> 	bitmap_fill(all_banks, MAX_NR_BANKS);
>> 	machine_check_poll(MCP_UC | m_fl, &all_banks);
>> 
>> +	local_irq_save(flags);
>> 	cr4_set_bits(X86_CR4_MCE);
>> +	local_irq_restore(flags);
>> 
>> 	rdmsrl(MSR_IA32_MCG_CAP, cap);
>> 	if (cap & MCG_CTL_P)
>> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c
>b/arch/x86/kernel/cpu/mcheck/p5.c
>> index 2a0717bf8033..d5d4963415e9 100644
>> --- a/arch/x86/kernel/cpu/mcheck/p5.c
>> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
>> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs
>*regs, long error_code)
>> /* Set up machine check reporting for processors with Intel style
>MCE: */
>> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>> {
>> +	unsigned long flags;
>> 	u32 l, h;
>> 
>> 	/* Default P5 to off as its often misconnected: */
>> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>> 	pr_info("Intel old style machine check architecture supported.\n");
>> 
>> 	/* Enable MCE: */
>> +	local_irq_save(flags);
>> 	cr4_set_bits(X86_CR4_MCE);
>> +	local_irq_restore(flags);
>> +
>> 	pr_info("Intel old style machine check reporting enabled on
>CPU#%d.\n",
>> 		smp_processor_id());
>> }
>> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c
>b/arch/x86/kernel/cpu/mcheck/winchip.c
>> index c6a722e1d011..6dd985e3849d 100644
>> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
>> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
>> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs
>*regs, long error_code)
>> /* Set up machine check reporting on the Winchip C6 series */
>> void winchip_mcheck_init(struct cpuinfo_x86 *c)
>> {
>> +	unsigned long flags;
>> 	u32 lo, hi;
>> 
>> 	machine_check_vector = winchip_machine_check;
>> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
>> 	lo &= ~(1<<4);	/* Enable MCE */
>> 	wrmsr(MSR_IDT_FCR1, lo, hi);
>> 
>> +	local_irq_save(flags);
>> 	cr4_set_bits(X86_CR4_MCE);
>> +	local_irq_restore(flags);
>> 
>> 	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
>> }
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 3ca198080ea9..09e44e784745 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -127,25 +127,35 @@ void flush_thread(void)
>> 
>> void disable_TSC(void)
>> {
>> +	unsigned long flags;
>> +
>> 	preempt_disable();
>> -	if (!test_and_set_thread_flag(TIF_NOTSC))
>> +	if (!test_and_set_thread_flag(TIF_NOTSC)) {
>> 		/*
>> 		 * Must flip the CPU state synchronously with
>> 		 * TIF_NOTSC in the current running context.
>> 		 */
>> +		local_irq_save(flags);
>> 		cr4_set_bits(X86_CR4_TSD);
>> +		local_irq_restore(flags);
>> +	}
>> 	preempt_enable();
>> }
>> 
>> static void enable_TSC(void)
>> {
>> +	unsigned long flags;
>> +
>> 	preempt_disable();
>> -	if (test_and_clear_thread_flag(TIF_NOTSC))
>> +	if (test_and_clear_thread_flag(TIF_NOTSC)) {
>> 		/*
>> 		 * Must flip the CPU state synchronously with
>> 		 * TIF_NOTSC in the current running context.
>> 		 */
>> +		local_irq_save(flags);
>> 		cr4_clear_bits(X86_CR4_TSD);
>> +		local_irq_restore(flags);
>> +	}
>> 	preempt_enable();
>> }

This is wrong on at least two levels colon first of all, this should not be wrapped around the abstracted operations but put inside them if relevant. Second, I suspect that this is not at all a requirement but rather that as long as the hardware register is written second, I think we should always be safe.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: x86: CR4 update when IRQs are enabled
  2017-11-15 23:48     ` hpa
@ 2017-11-16  0:34       ` Nadav Amit
  2017-11-16  3:44         ` Andy Lutomirski
  0 siblings, 1 reply; 6+ messages in thread
From: Nadav Amit @ 2017-11-16  0:34 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Dave Hansen, Thomas Gleixner, Borislav Petkov,
	the arch/x86 maintainers, Andy Lutomirski, LKML

hpa@zytor.com wrote:

> On November 15, 2017 3:31:50 PM PST, Nadav Amit <nadav.amit@gmail.com> wrote:
>> Ping?
>> 
>> Nadav Amit <nadav.amit@gmail.com> wrote:
>> 
>>> CC’ing more people, and adding a patch to clarify...
>>> 
>>> Nadav Amit <nadav.amit@gmail.com> wrote:
>>> 
>>>> I am puzzled by the comment in tlb_state.cr4 , which says:
>>>> 
>>>>      /*
>>>>       * Access to this CR4 shadow and to H/W CR4 is protected by
>>>>       * disabling interrupts when modifying either one.
>>>>       */
>>>> 
>>>> This does not seem to be true and adding a warning to CR4 writes
>> when
>>>> !irqs_disabled() immediately fires in identify_cpu() and
>>>> __mcheck_cpu_init_generic(). While these two are called during boot,
>> I think
>>>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>>>> 
>>>> So my question(s): Is the comment correct? Is the current behavior
>> correct?
>>> So here is what I have in mind. I am not sure whether
>> CONFIG_DEBUG_PREEMPT is
>>> the right #ifdef. Let me know what you think.
>>> 
>>> -- >8 --
>>> 
>>> Subject: [PATCH] x86: disable IRQs before changing CR4
>>> 
>>> CR4 changes need to be performed while IRQs are disabled in order to
>>> update the CR4 shadow and the actual register atomically.
>>> 
>>> Signed-off-by: Nadav Amit <namit@vmware.com>
>>> ---
>>> arch/x86/include/asm/tlbflush.h      | 18 ++++++++++++------
>>> arch/x86/kernel/cpu/common.c         | 13 ++++++++++++-
>>> arch/x86/kernel/cpu/mcheck/mce.c     |  3 +++
>>> arch/x86/kernel/cpu/mcheck/p5.c      |  4 ++++
>>> arch/x86/kernel/cpu/mcheck/winchip.c |  3 +++
>>> arch/x86/kernel/process.c            | 14 ++++++++++++--
>>> 6 files changed, 46 insertions(+), 9 deletions(-)
>>> 
>>> diff --git a/arch/x86/include/asm/tlbflush.h
>> b/arch/x86/include/asm/tlbflush.h
>>> index 50ea3482e1d1..bc70dd1cc7c6 100644
>>> --- a/arch/x86/include/asm/tlbflush.h
>>> +++ b/arch/x86/include/asm/tlbflush.h
>>> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
>>> 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
>>> }
>>> 
>>> +static inline void update_cr4(unsigned long cr4)
>>> +{
>>> +#ifdef CONFIG_DEBUG_PREEMPT
>>> +	WARN_ON_ONCE(!irqs_disabled());
>>> +#endif
>>> +	this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> +	__write_cr4(cr4);
>>> +}
>>> +
>>> /* Set in this cpu's CR4. */
>>> static inline void cr4_set_bits(unsigned long mask)
>>> {
>>> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long
>> mask)
>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>> 	if ((cr4 | mask) != cr4) {
>>> 		cr4 |= mask;
>>> -		this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> -		__write_cr4(cr4);
>>> +		update_cr4(cr4);
>>> 	}
>>> }
>>> 
>>> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long
>> mask)
>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>> 	if ((cr4 & ~mask) != cr4) {
>>> 		cr4 &= ~mask;
>>> -		this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> -		__write_cr4(cr4);
>>> +		update_cr4(cr4);
>>> 	}
>>> }
>>> 
>>> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long
>> mask)
>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>> 	cr4 ^= mask;
>>> -	this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> -	__write_cr4(cr4);
>>> +	update_cr4(cr4);
>>> }
>>> 
>>> /* Read the CR4 shadow. */
>>> diff --git a/arch/x86/kernel/cpu/common.c
>> b/arch/x86/kernel/cpu/common.c
>>> index c8b39870f33e..82e6b41fd5e9 100644
>>> --- a/arch/x86/kernel/cpu/common.c
>>> +++ b/arch/x86/kernel/cpu/common.c
>>> @@ -318,6 +318,8 @@ static bool pku_disabled;
>>> 
>>> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
>>> {
>>> +	unsigned long flags;
>>> +
>>> 	/* check the boot processor, plus compile options for PKU: */
>>> 	if (!cpu_feature_enabled(X86_FEATURE_PKU))
>>> 		return;
>>> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct
>> cpuinfo_x86 *c)
>>> if (pku_disabled)
>>> 		return;
>>> 
>>> +	local_irq_save(flags);
>>> 	cr4_set_bits(X86_CR4_PKE);
>>> +	local_irq_restore(flags);
>>> +
>>> 	/*
>>> 	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
>>> 	 * cpuid bit to be set.  We need to ensure that we
>>> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct
>> cpuinfo_x86 *c)
>>> */
>>> static void identify_cpu(struct cpuinfo_x86 *c)
>>> {
>>> +	unsigned long flags;
>>> 	int i;
>>> 
>>> 	c->loops_per_jiffy = loops_per_jiffy;
>>> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86
>> *c)
>>> /* Disable the PN if appropriate */
>>> 	squash_the_stupid_serial_number(c);
>>> 
>>> -	/* Set up SMEP/SMAP */
>>> +	/*
>>> +	 * Set up SMEP/SMAP. Disable interrupts to prevent triggering a
>> warning
>>> +	 * as CR4 changes must be done with disabled interrupts.
>>> +	 */
>>> +	local_irq_save(flags);
>>> 	setup_smep(c);
>>> 	setup_smap(c);
>>> +	local_irq_restore(flags);
>>> 
>>> 	/*
>>> 	 * The vendor-specific functions might have changed features.
>>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
>> b/arch/x86/kernel/cpu/mcheck/mce.c
>>> index 3b413065c613..497c07e33c25 100644
>>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>>> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
>>> {
>>> 	enum mcp_flags m_fl = 0;
>>> 	mce_banks_t all_banks;
>>> +	unsigned long flags;
>>> 	u64 cap;
>>> 
>>> 	if (!mca_cfg.bootlog)
>>> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
>>> 	bitmap_fill(all_banks, MAX_NR_BANKS);
>>> 	machine_check_poll(MCP_UC | m_fl, &all_banks);
>>> 
>>> +	local_irq_save(flags);
>>> 	cr4_set_bits(X86_CR4_MCE);
>>> +	local_irq_restore(flags);
>>> 
>>> 	rdmsrl(MSR_IA32_MCG_CAP, cap);
>>> 	if (cap & MCG_CTL_P)
>>> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c
>> b/arch/x86/kernel/cpu/mcheck/p5.c
>>> index 2a0717bf8033..d5d4963415e9 100644
>>> --- a/arch/x86/kernel/cpu/mcheck/p5.c
>>> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
>>> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs
>> *regs, long error_code)
>>> /* Set up machine check reporting for processors with Intel style
>> MCE: */
>>> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>> {
>>> +	unsigned long flags;
>>> 	u32 l, h;
>>> 
>>> 	/* Default P5 to off as its often misconnected: */
>>> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>> 	pr_info("Intel old style machine check architecture supported.\n");
>>> 
>>> 	/* Enable MCE: */
>>> +	local_irq_save(flags);
>>> 	cr4_set_bits(X86_CR4_MCE);
>>> +	local_irq_restore(flags);
>>> +
>>> 	pr_info("Intel old style machine check reporting enabled on
>> CPU#%d.\n",
>>> smp_processor_id());
>>> }
>>> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c
>> b/arch/x86/kernel/cpu/mcheck/winchip.c
>>> index c6a722e1d011..6dd985e3849d 100644
>>> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
>>> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
>>> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs
>> *regs, long error_code)
>>> /* Set up machine check reporting on the Winchip C6 series */
>>> void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>> {
>>> +	unsigned long flags;
>>> 	u32 lo, hi;
>>> 
>>> 	machine_check_vector = winchip_machine_check;
>>> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>> 	lo &= ~(1<<4);	/* Enable MCE */
>>> 	wrmsr(MSR_IDT_FCR1, lo, hi);
>>> 
>>> +	local_irq_save(flags);
>>> 	cr4_set_bits(X86_CR4_MCE);
>>> +	local_irq_restore(flags);
>>> 
>>> 	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
>>> }
>>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>>> index 3ca198080ea9..09e44e784745 100644
>>> --- a/arch/x86/kernel/process.c
>>> +++ b/arch/x86/kernel/process.c
>>> @@ -127,25 +127,35 @@ void flush_thread(void)
>>> 
>>> void disable_TSC(void)
>>> {
>>> +	unsigned long flags;
>>> +
>>> 	preempt_disable();
>>> -	if (!test_and_set_thread_flag(TIF_NOTSC))
>>> +	if (!test_and_set_thread_flag(TIF_NOTSC)) {
>>> 		/*
>>> 		 * Must flip the CPU state synchronously with
>>> 		 * TIF_NOTSC in the current running context.
>>> 		 */
>>> +		local_irq_save(flags);
>>> 		cr4_set_bits(X86_CR4_TSD);
>>> +		local_irq_restore(flags);
>>> +	}
>>> 	preempt_enable();
>>> }
>>> 
>>> static void enable_TSC(void)
>>> {
>>> +	unsigned long flags;
>>> +
>>> 	preempt_disable();
>>> -	if (test_and_clear_thread_flag(TIF_NOTSC))
>>> +	if (test_and_clear_thread_flag(TIF_NOTSC)) {
>>> 		/*
>>> 		 * Must flip the CPU state synchronously with
>>> 		 * TIF_NOTSC in the current running context.
>>> 		 */
>>> +		local_irq_save(flags);
>>> 		cr4_clear_bits(X86_CR4_TSD);
>>> +		local_irq_restore(flags);
>>> +	}
>>> 	preempt_enable();
>>> }
> 
> This is wrong on at least two levels colon first of all, this should not be wrapped around the abstracted operations but put inside them if relevant. Second, I suspect that this is not at all a requirement but rather that as long as the hardware register is written second, I think we should always be safe.

Thanks for your reply.

Can you please explain your suspicion? Here is a simple execution that may
fail (assume cr4 is initially zero):

  CPU0
  ====
  cr4_set_bits(1)
    => cpu_tlbstate.cr4 = 1
         => interrupt
            cr4_set_bits(2)
            cpu_tlbstate.cr4 = 3
            __write_cr4(3)
    => __write_cr4(1)		[ and should have been 3 ]

Am I missing anything?

Now, it may be a theoretical issue right now, since I did not find any
change of CR4 bits that happens in an interrupt handler.

Anyhow, if you want me to submit a fix, please let me know what other levels
of wrongness you had in mind.

Regards,
Nadav

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: x86: CR4 update when IRQs are enabled
  2017-11-16  0:34       ` Nadav Amit
@ 2017-11-16  3:44         ` Andy Lutomirski
  0 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2017-11-16  3:44 UTC (permalink / raw)
  To: Nadav Amit
  Cc: H. Peter Anvin, Dave Hansen, Thomas Gleixner, Borislav Petkov,
	the arch/x86 maintainers, Andy Lutomirski, LKML

I think Nadav is right here.  IMO the right fix is to rename the
functions cr4_set_bits_irqs_off() etc, add a warning (if lockdep is
on) and fix the callers.

On Wed, Nov 15, 2017 at 4:34 PM, Nadav Amit <nadav.amit@gmail.com> wrote:
> hpa@zytor.com wrote:
>
>> On November 15, 2017 3:31:50 PM PST, Nadav Amit <nadav.amit@gmail.com> wrote:
>>> Ping?
>>>
>>> Nadav Amit <nadav.amit@gmail.com> wrote:
>>>
>>>> CC’ing more people, and adding a patch to clarify...
>>>>
>>>> Nadav Amit <nadav.amit@gmail.com> wrote:
>>>>
>>>>> I am puzzled by the comment in tlb_state.cr4 , which says:
>>>>>
>>>>>      /*
>>>>>       * Access to this CR4 shadow and to H/W CR4 is protected by
>>>>>       * disabling interrupts when modifying either one.
>>>>>       */
>>>>>
>>>>> This does not seem to be true and adding a warning to CR4 writes
>>> when
>>>>> !irqs_disabled() immediately fires in identify_cpu() and
>>>>> __mcheck_cpu_init_generic(). While these two are called during boot,
>>> I think
>>>>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>>>>>
>>>>> So my question(s): Is the comment correct? Is the current behavior
>>> correct?
>>>> So here is what I have in mind. I am not sure whether
>>> CONFIG_DEBUG_PREEMPT is
>>>> the right #ifdef. Let me know what you think.
>>>>
>>>> -- >8 --
>>>>
>>>> Subject: [PATCH] x86: disable IRQs before changing CR4
>>>>
>>>> CR4 changes need to be performed while IRQs are disabled in order to
>>>> update the CR4 shadow and the actual register atomically.
>>>>
>>>> Signed-off-by: Nadav Amit <namit@vmware.com>
>>>> ---
>>>> arch/x86/include/asm/tlbflush.h      | 18 ++++++++++++------
>>>> arch/x86/kernel/cpu/common.c         | 13 ++++++++++++-
>>>> arch/x86/kernel/cpu/mcheck/mce.c     |  3 +++
>>>> arch/x86/kernel/cpu/mcheck/p5.c      |  4 ++++
>>>> arch/x86/kernel/cpu/mcheck/winchip.c |  3 +++
>>>> arch/x86/kernel/process.c            | 14 ++++++++++++--
>>>> 6 files changed, 46 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/tlbflush.h
>>> b/arch/x86/include/asm/tlbflush.h
>>>> index 50ea3482e1d1..bc70dd1cc7c6 100644
>>>> --- a/arch/x86/include/asm/tlbflush.h
>>>> +++ b/arch/x86/include/asm/tlbflush.h
>>>> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
>>>>     this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
>>>> }
>>>>
>>>> +static inline void update_cr4(unsigned long cr4)
>>>> +{
>>>> +#ifdef CONFIG_DEBUG_PREEMPT
>>>> +   WARN_ON_ONCE(!irqs_disabled());
>>>> +#endif
>>>> +   this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> +   __write_cr4(cr4);
>>>> +}
>>>> +
>>>> /* Set in this cpu's CR4. */
>>>> static inline void cr4_set_bits(unsigned long mask)
>>>> {
>>>> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long
>>> mask)
>>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>>>     if ((cr4 | mask) != cr4) {
>>>>             cr4 |= mask;
>>>> -           this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> -           __write_cr4(cr4);
>>>> +           update_cr4(cr4);
>>>>     }
>>>> }
>>>>
>>>> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long
>>> mask)
>>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>>>     if ((cr4 & ~mask) != cr4) {
>>>>             cr4 &= ~mask;
>>>> -           this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> -           __write_cr4(cr4);
>>>> +           update_cr4(cr4);
>>>>     }
>>>> }
>>>>
>>>> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long
>>> mask)
>>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>>>     cr4 ^= mask;
>>>> -   this_cpu_write(cpu_tlbstate.cr4, cr4);
>>>> -   __write_cr4(cr4);
>>>> +   update_cr4(cr4);
>>>> }
>>>>
>>>> /* Read the CR4 shadow. */
>>>> diff --git a/arch/x86/kernel/cpu/common.c
>>> b/arch/x86/kernel/cpu/common.c
>>>> index c8b39870f33e..82e6b41fd5e9 100644
>>>> --- a/arch/x86/kernel/cpu/common.c
>>>> +++ b/arch/x86/kernel/cpu/common.c
>>>> @@ -318,6 +318,8 @@ static bool pku_disabled;
>>>>
>>>> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>> +
>>>>     /* check the boot processor, plus compile options for PKU: */
>>>>     if (!cpu_feature_enabled(X86_FEATURE_PKU))
>>>>             return;
>>>> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct
>>> cpuinfo_x86 *c)
>>>> if (pku_disabled)
>>>>             return;
>>>>
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_PKE);
>>>> +   local_irq_restore(flags);
>>>> +
>>>>     /*
>>>>      * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
>>>>      * cpuid bit to be set.  We need to ensure that we
>>>> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct
>>> cpuinfo_x86 *c)
>>>> */
>>>> static void identify_cpu(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>>     int i;
>>>>
>>>>     c->loops_per_jiffy = loops_per_jiffy;
>>>> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86
>>> *c)
>>>> /* Disable the PN if appropriate */
>>>>     squash_the_stupid_serial_number(c);
>>>>
>>>> -   /* Set up SMEP/SMAP */
>>>> +   /*
>>>> +    * Set up SMEP/SMAP. Disable interrupts to prevent triggering a
>>> warning
>>>> +    * as CR4 changes must be done with disabled interrupts.
>>>> +    */
>>>> +   local_irq_save(flags);
>>>>     setup_smep(c);
>>>>     setup_smap(c);
>>>> +   local_irq_restore(flags);
>>>>
>>>>     /*
>>>>      * The vendor-specific functions might have changed features.
>>>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
>>> b/arch/x86/kernel/cpu/mcheck/mce.c
>>>> index 3b413065c613..497c07e33c25 100644
>>>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>>>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>>>> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
>>>> {
>>>>     enum mcp_flags m_fl = 0;
>>>>     mce_banks_t all_banks;
>>>> +   unsigned long flags;
>>>>     u64 cap;
>>>>
>>>>     if (!mca_cfg.bootlog)
>>>> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
>>>>     bitmap_fill(all_banks, MAX_NR_BANKS);
>>>>     machine_check_poll(MCP_UC | m_fl, &all_banks);
>>>>
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_MCE);
>>>> +   local_irq_restore(flags);
>>>>
>>>>     rdmsrl(MSR_IA32_MCG_CAP, cap);
>>>>     if (cap & MCG_CTL_P)
>>>> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c
>>> b/arch/x86/kernel/cpu/mcheck/p5.c
>>>> index 2a0717bf8033..d5d4963415e9 100644
>>>> --- a/arch/x86/kernel/cpu/mcheck/p5.c
>>>> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
>>>> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs
>>> *regs, long error_code)
>>>> /* Set up machine check reporting for processors with Intel style
>>> MCE: */
>>>> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>>     u32 l, h;
>>>>
>>>>     /* Default P5 to off as its often misconnected: */
>>>> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>>>     pr_info("Intel old style machine check architecture supported.\n");
>>>>
>>>>     /* Enable MCE: */
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_MCE);
>>>> +   local_irq_restore(flags);
>>>> +
>>>>     pr_info("Intel old style machine check reporting enabled on
>>> CPU#%d.\n",
>>>> smp_processor_id());
>>>> }
>>>> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c
>>> b/arch/x86/kernel/cpu/mcheck/winchip.c
>>>> index c6a722e1d011..6dd985e3849d 100644
>>>> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
>>>> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
>>>> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs
>>> *regs, long error_code)
>>>> /* Set up machine check reporting on the Winchip C6 series */
>>>> void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>>> {
>>>> +   unsigned long flags;
>>>>     u32 lo, hi;
>>>>
>>>>     machine_check_vector = winchip_machine_check;
>>>> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>>>     lo &= ~(1<<4);  /* Enable MCE */
>>>>     wrmsr(MSR_IDT_FCR1, lo, hi);
>>>>
>>>> +   local_irq_save(flags);
>>>>     cr4_set_bits(X86_CR4_MCE);
>>>> +   local_irq_restore(flags);
>>>>
>>>>     pr_info("Winchip machine check reporting enabled on CPU#0.\n");
>>>> }
>>>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>>>> index 3ca198080ea9..09e44e784745 100644
>>>> --- a/arch/x86/kernel/process.c
>>>> +++ b/arch/x86/kernel/process.c
>>>> @@ -127,25 +127,35 @@ void flush_thread(void)
>>>>
>>>> void disable_TSC(void)
>>>> {
>>>> +   unsigned long flags;
>>>> +
>>>>     preempt_disable();
>>>> -   if (!test_and_set_thread_flag(TIF_NOTSC))
>>>> +   if (!test_and_set_thread_flag(TIF_NOTSC)) {
>>>>             /*
>>>>              * Must flip the CPU state synchronously with
>>>>              * TIF_NOTSC in the current running context.
>>>>              */
>>>> +           local_irq_save(flags);
>>>>             cr4_set_bits(X86_CR4_TSD);
>>>> +           local_irq_restore(flags);
>>>> +   }
>>>>     preempt_enable();
>>>> }
>>>>
>>>> static void enable_TSC(void)
>>>> {
>>>> +   unsigned long flags;
>>>> +
>>>>     preempt_disable();
>>>> -   if (test_and_clear_thread_flag(TIF_NOTSC))
>>>> +   if (test_and_clear_thread_flag(TIF_NOTSC)) {
>>>>             /*
>>>>              * Must flip the CPU state synchronously with
>>>>              * TIF_NOTSC in the current running context.
>>>>              */
>>>> +           local_irq_save(flags);
>>>>             cr4_clear_bits(X86_CR4_TSD);
>>>> +           local_irq_restore(flags);
>>>> +   }
>>>>     preempt_enable();
>>>> }
>>
>> This is wrong on at least two levels colon first of all, this should not be wrapped around the abstracted operations but put inside them if relevant. Second, I suspect that this is not at all a requirement but rather that as long as the hardware register is written second, I think we should always be safe.
>
> Thanks for your reply.
>
> Can you please explain your suspicion? Here is a simple execution that may
> fail (assume cr4 is initially zero):
>
>   CPU0
>   ====
>   cr4_set_bits(1)
>     => cpu_tlbstate.cr4 = 1
>          => interrupt
>             cr4_set_bits(2)
>             cpu_tlbstate.cr4 = 3
>             __write_cr4(3)
>     => __write_cr4(1)           [ and should have been 3 ]
>
> Am I missing anything?
>
> Now, it may be a theoretical issue right now, since I did not find any
> change of CR4 bits that happens in an interrupt handler.
>
> Anyhow, if you want me to submit a fix, please let me know what other levels
> of wrongness you had in mind.
>
> Regards,
> Nadav
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-11-16  3:44 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-08 21:52 x86: CR4 update when IRQs are enabled Nadav Amit
2017-11-09  6:29 ` Nadav Amit
2017-11-15 23:31   ` Nadav Amit
2017-11-15 23:48     ` hpa
2017-11-16  0:34       ` Nadav Amit
2017-11-16  3:44         ` Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.