All of lore.kernel.org
 help / color / mirror / Atom feed
* x86: FPU cleanups
@ 2010-08-28 16:04 Brian Gerst
  2010-08-28 16:04 ` [PATCH 01/11] x86: Use correct type for %cr4 Brian Gerst
                   ` (10 more replies)
  0 siblings, 11 replies; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

[PATCH 01/11] x86: Use correct type for %cr4
[PATCH 02/11] x86: Merge fpu_init()
[PATCH 03/11] x86: Merge tolerant_fwait()
[PATCH 04/11] x86: Merge __save_init_fpu()
[PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU
[PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr()
[PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor
[PATCH 08/11] x86-32: Remove math_emulate stub
[PATCH 09/11] x86: Merge fpu_save_init()
[PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
[PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros

 arch/x86/include/asm/i387.h      |  188 ++++++++++----------------------------
 arch/x86/include/asm/processor.h |    4 +-
 arch/x86/kernel/cpu/common.c     |    7 --
 arch/x86/kernel/i387.c           |   49 ++++------
 arch/x86/kernel/process_64.c     |    2 +-
 arch/x86/kernel/traps.c          |   35 +------
 6 files changed, 78 insertions(+), 207 deletions(-)


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 01/11] x86: Use correct type for %cr4
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:24   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 02/11] x86: Merge fpu_init() Brian Gerst
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

%cr4 is 64-bit in 64-bit mode (although the upper 32-bits are currently reserved).
Use unsigned long for the temporary variable to get the right size.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/processor.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 325b7bd..396b80f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -602,7 +602,7 @@ extern unsigned long		mmu_cr4_features;
 
 static inline void set_in_cr4(unsigned long mask)
 {
-	unsigned cr4;
+	unsigned long cr4;
 
 	mmu_cr4_features |= mask;
 	cr4 = read_cr4();
@@ -612,7 +612,7 @@ static inline void set_in_cr4(unsigned long mask)
 
 static inline void clear_in_cr4(unsigned long mask)
 {
-	unsigned cr4;
+	unsigned long cr4;
 
 	mmu_cr4_features &= ~mask;
 	cr4 = read_cr4();
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 02/11] x86: Merge fpu_init()
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
  2010-08-28 16:04 ` [PATCH 01/11] x86: Use correct type for %cr4 Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:29   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 03/11] x86: Merge tolerant_fwait() Brian Gerst
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

Make fpu_init() handle 32-bit setup.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/kernel/cpu/common.c |    7 -------
 arch/x86/kernel/i387.c       |   27 ++++++++++++---------------
 arch/x86/kernel/traps.c      |   12 ------------
 3 files changed, 12 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 490dac6..f9e23e8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1264,13 +1264,6 @@ void __cpuinit cpu_init(void)
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
 
-	/*
-	 * Force FPU initialization:
-	 */
-	current_thread_info()->status = 0;
-	clear_used_math();
-	mxcsr_feature_mask_init();
-
 	fpu_init();
 	xsave_init();
 }
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index a46cb35..c795675 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -80,7 +80,6 @@ static void __cpuinit init_thread_xstate(void)
 #endif
 }
 
-#ifdef CONFIG_X86_64
 /*
  * Called at bootup to set up the initial FPU state that is later cloned
  * into all processes.
@@ -88,12 +87,20 @@ static void __cpuinit init_thread_xstate(void)
 
 void __cpuinit fpu_init(void)
 {
-	unsigned long oldcr0 = read_cr0();
+	unsigned long cr0;
+	unsigned long cr4_mask = 0;
 
-	set_in_cr4(X86_CR4_OSFXSR);
-	set_in_cr4(X86_CR4_OSXMMEXCPT);
+	if (cpu_has_fxsr)
+		cr4_mask |= X86_CR4_OSFXSR;
+	if (cpu_has_xmm)
+		cr4_mask |= X86_CR4_OSXMMEXCPT;
+	set_in_cr4(cr4_mask);
 
-	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
+	cr0 = read_cr0();
+	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
+	if (!HAVE_HWFP)
+		cr0 |= X86_CR0_EM;
+	write_cr0(cr0);
 
 	if (!smp_processor_id())
 		init_thread_xstate();
@@ -104,16 +111,6 @@ void __cpuinit fpu_init(void)
 	clear_used_math();
 }
 
-#else	/* CONFIG_X86_64 */
-
-void __cpuinit fpu_init(void)
-{
-	if (!smp_processor_id())
-		init_thread_xstate();
-}
-
-#endif	/* CONFIG_X86_32 */
-
 void fpu_finit(struct fpu *fpu)
 {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 60788de..d0029eb 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -881,18 +881,6 @@ void __init trap_init(void)
 #endif
 
 #ifdef CONFIG_X86_32
-	if (cpu_has_fxsr) {
-		printk(KERN_INFO "Enabling fast FPU save and restore... ");
-		set_in_cr4(X86_CR4_OSFXSR);
-		printk("done.\n");
-	}
-	if (cpu_has_xmm) {
-		printk(KERN_INFO
-			"Enabling unmasked SIMD FPU exception support... ");
-		set_in_cr4(X86_CR4_OSXMMEXCPT);
-		printk("done.\n");
-	}
-
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 03/11] x86: Merge tolerant_fwait()
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
  2010-08-28 16:04 ` [PATCH 01/11] x86: Use correct type for %cr4 Brian Gerst
  2010-08-28 16:04 ` [PATCH 02/11] x86: Merge fpu_init() Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:32   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 04/11] x86: Merge __save_init_fpu() Brian Gerst
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

Now that 32-bit can handle exceptions from the kernel, switch to using
the 64-bit version of tolerant_fwait() without fnclex.  In the unlikely
event an exception is triggered, it is simply ignored.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h |   19 ++++---------------
 1 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index a73a8d5..5d8f9a7 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -77,15 +77,6 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
 }
 
 #ifdef CONFIG_X86_64
-
-/* Ignore delayed exceptions from user space */
-static inline void tolerant_fwait(void)
-{
-	asm volatile("1: fwait\n"
-		     "2:\n"
-		     _ASM_EXTABLE(1b, 2b));
-}
-
 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
 	int err;
@@ -220,11 +211,6 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
-static inline void tolerant_fwait(void)
-{
-	asm volatile("fnclex ; fwait");
-}
-
 /* perform fxrstor iff the processor has extended states, otherwise frstor */
 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
@@ -344,7 +330,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
 static inline void __clear_fpu(struct task_struct *tsk)
 {
 	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		tolerant_fwait();
+		/* Ignore delayed exceptions from user space */
+		asm volatile("1: fwait\n"
+			     "2:\n"
+			     _ASM_EXTABLE(1b, 2b));
 		task_thread_info(tsk)->status &= ~TS_USEDFPU;
 		stts();
 	}
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 04/11] x86: Merge __save_init_fpu()
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (2 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 03/11] x86: Merge tolerant_fwait() Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:33   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU Brian Gerst
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

__save_init_fpu() is identical for 32-bit and 64-bit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h |   11 ++---------
 1 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 5d8f9a7..88065e3 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -197,12 +197,6 @@ static inline void fpu_save_init(struct fpu *fpu)
 	fpu_clear(fpu);
 }
 
-static inline void __save_init_fpu(struct task_struct *tsk)
-{
-	fpu_save_init(&tsk->thread.fpu);
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
 #else  /* CONFIG_X86_32 */
 
 #ifdef CONFIG_MATH_EMULATION
@@ -285,15 +279,14 @@ end:
 	;
 }
 
+#endif	/* CONFIG_X86_64 */
+
 static inline void __save_init_fpu(struct task_struct *tsk)
 {
 	fpu_save_init(&tsk->thread.fpu);
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
-
-#endif	/* CONFIG_X86_64 */
-
 static inline int fpu_fxrstor_checking(struct fpu *fpu)
 {
 	return fxrstor_checking(&fpu->state->fxsave);
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (3 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 04/11] x86: Merge __save_init_fpu() Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:38   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr() Brian Gerst
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

Consolidates code and fixes the below race for 64-bit.

commit 9fa2f37bfeb798728241cc4a19578ce6e4258f25
Author: torvalds <torvalds>
Date:   Tue Sep 2 07:37:25 2003 +0000

    Be a lot more careful about TS_USEDFPU and preemption

    We had some races where we testecd (or set) TS_USEDFPU together
    with sequences that depended on the setting (like clearing or
    setting the TS flag in %cr0) and we could be preempted in between,
    which screws up the FPU state, since preemption will itself change
    USEDFPU and the TS flag.

    This makes it a lot more explicit: the "internal" low-level FPU
    functions ("__xxxx_fpu()") all require preemption to be disabled,
    and the exported "real" functions will make sure that is the case.

    One case - in __switch_to() - was switched to the non-preempt-safe
    internal version, since the scheduler itself has already disabled
    preemption.

    BKrev: 3f5448b5WRiQuyzAlbajs3qoQjSobw

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h  |   15 ---------------
 arch/x86/kernel/process_64.c |    2 +-
 2 files changed, 1 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 88065e3..8b40a83 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -387,19 +387,6 @@ static inline void irq_ts_restore(int TS_state)
 		stts();
 }
 
-#ifdef CONFIG_X86_64
-
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-	__save_init_fpu(tsk);
-	stts();
-}
-
-#define unlazy_fpu	__unlazy_fpu
-#define clear_fpu	__clear_fpu
-
-#else  /* CONFIG_X86_32 */
-
 /*
  * These disable preemption on their own and are safe
  */
@@ -425,8 +412,6 @@ static inline void clear_fpu(struct task_struct *tsk)
 	preempt_enable();
 }
 
-#endif	/* CONFIG_X86_64 */
-
 /*
  * i387 state interaction
  */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3d9ea53..b3d7a3a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	load_TLS(next, cpu);
 
 	/* Must be after DS reload */
-	unlazy_fpu(prev_p);
+	__unlazy_fpu(prev_p);
 
 	/* Make sure cpu is ready for new context */
 	if (preload_fpu)
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr()
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (4 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:41   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor Brian Gerst
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

While %ds still contains the userspace selector, %cs is KERNEL_CS
at this point.  Always get %cs from pt_regs.

It actually is possible to get the correct segments for compat tasks,
but that involves using the [f]xsave instruction without a REX.W prefix.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/kernel/i387.c |   18 ++++++++----------
 1 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index c795675..b1a732d 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -383,19 +383,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 #ifdef CONFIG_X86_64
 	env->fip = fxsave->rip;
 	env->foo = fxsave->rdp;
+	/*
+	 * should be actually ds/cs at fpu exception time, but
+	 * that information is not available in 64bit mode.
+	 */
+	env->fcs = task_pt_regs(tsk)->cs;
 	if (tsk == current) {
-		/*
-		 * should be actually ds/cs at fpu exception time, but
-		 * that information is not available in 64bit mode.
-		 */
-		asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
-		asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
+		savesegment(ds, env->fos);
 	} else {
-		struct pt_regs *regs = task_pt_regs(tsk);
-
-		env->fos = 0xffff0000 | tsk->thread.ds;
-		env->fcs = regs->cs;
+		env->fos = tsk->thread.ds;
 	}
+	env->fos |= 0xffff0000;
 #else
 	env->fip = fxsave->fip;
 	env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (5 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr() Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:45   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 08/11] x86-32: Remove math_emulate stub Brian Gerst
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

Use the "R" constraint (legacy register) instead of listing all the
possible registers.  Clean up the comments as well.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h |   44 ++++++++++++++++--------------------------
 1 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 8b40a83..768fcb2 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
 	int err;
 
+	/* See comment in fxsave() below. */
 	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
 		     "2:\n"
 		     ".section .fixup,\"ax\"\n"
@@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err)
-#if 0 /* See comment in fxsave() below. */
-		     : [fx] "r" (fx), "m" (*fx), "0" (0));
-#else
-		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
-#endif
+		     : [fx] "R" (fx), "m" (*fx), "0" (0));
 	return err;
 }
 
@@ -140,6 +137,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 	if (unlikely(err))
 		return -EFAULT;
 
+	/* See comment in fxsave() below. */
 	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
 		     "2:\n"
 		     ".section .fixup,\"ax\"\n"
@@ -148,11 +146,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in fxsave() below. */
-		     : [fx] "r" (fx), "0" (0));
-#else
-		     : [fx] "cdaSDb" (fx), "0" (0));
-#endif
+		     : [fx] "R" (fx), "0" (0));
 	if (unlikely(err) &&
 	    __clear_user(fx, sizeof(struct i387_fxsave_struct)))
 		err = -EFAULT;
@@ -165,26 +159,22 @@ static inline void fpu_fxsave(struct fpu *fpu)
 	/* Using "rex64; fxsave %0" is broken because, if the memory operand
 	   uses any extended registers for addressing, a second REX prefix
 	   will be generated (to the assembler, rex64 followed by semicolon
-	   is a separate instruction), and hence the 64-bitness is lost. */
-#if 0
-	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
-	   starting with gas 2.16. */
-	__asm__ __volatile__("fxsaveq %0"
-			     : "=m" (fpu->state->fxsave));
-#elif 0
-	/* Using, as a workaround, the properly prefixed form below isn't
+	   is a separate instruction), and hence the 64-bitness is lost.
+	   Using "fxsaveq %0" would be the ideal choice, but is only supported
+	   starting with gas 2.16.
+	asm volatile("fxsaveq %0"
+		     : "=m" (fpu->state->fxsave));
+	   Using, as a workaround, the properly prefixed form below isn't
 	   accepted by any binutils version so far released, complaining that
 	   the same type of prefix is used twice if an extended register is
-	   needed for addressing (fix submitted to mainline 2005-11-21). */
-	__asm__ __volatile__("rex64/fxsave %0"
-			     : "=m" (fpu->state->fxsave));
-#else
-	/* This, however, we can work around by forcing the compiler to select
+	   needed for addressing (fix submitted to mainline 2005-11-21).
+	asm volatile("rex64/fxsave %0"
+		     : "=m" (fpu->state->fxsave));
+	   This, however, we can work around by forcing the compiler to select
 	   an addressing mode that doesn't require extended registers. */
-	__asm__ __volatile__("rex64/fxsave (%1)"
-			     : "=m" (fpu->state->fxsave)
-			     : "cdaSDb" (&fpu->state->fxsave));
-#endif
+	asm volatile("rex64/fxsave (%[fx])"
+		     : "=m" (fpu->state->fxsave)
+		     : [fx] "R" (&fpu->state->fxsave));
 }
 
 static inline void fpu_save_init(struct fpu *fpu)
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 08/11] x86-32: Remove math_emulate stub
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (6 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:47   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 09/11] x86: Merge fpu_save_init() Brian Gerst
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

check_fpu() in bugs.c halts boot if no FPU is found and math emulation
isn't enabled.  Therefore this stub will never be used.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/kernel/traps.c |   23 ++++++-----------------
 1 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d0029eb..d439685 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void)
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
-#ifndef CONFIG_MATH_EMULATION
-void math_emulate(struct math_emu_info *info)
-{
-	printk(KERN_EMERG
-		"math-emulation not enabled and no coprocessor found.\n");
-	printk(KERN_EMERG "killing %s.\n", current->comm);
-	force_sig(SIGFPE, current);
-	schedule();
-}
-#endif /* CONFIG_MATH_EMULATION */
-
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
 
@@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 
 		info.regs = regs;
 		math_emulate(&info);
-	} else {
-		math_state_restore(); /* interrupts still off */
-		conditional_sti(regs);
+		return;
 	}
-#else
-	math_state_restore();
+#endif
+	math_state_restore(); /* interrupts still off */
+#ifdef CONFIG_X86_32
+	conditional_sti(regs);
 #endif
 }
 
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 09/11] x86: Merge fpu_save_init()
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (7 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 08/11] x86-32: Remove math_emulate stub Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 18:54   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code Brian Gerst
  2010-08-28 16:04 ` [PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros Brian Gerst
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

Merge 32-bit and 64-bit fpu_save_init().

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h |   88 +++++++++++--------------------------------
 1 files changed, 22 insertions(+), 66 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 768fcb2..58cb6f0 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -94,36 +94,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 	return err;
 }
 
-/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
-   is pending. Clear the x87 state here by setting it to fixed
-   values. The kernel data segment can be sometimes 0 and sometimes
-   new user value. Both should be ok.
-   Use the PDA as safe address because it should be already in L1. */
-static inline void fpu_clear(struct fpu *fpu)
-{
-	struct xsave_struct *xstate = &fpu->state->xsave;
-	struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-
-	/*
-	 * xsave header may indicate the init state of the FP.
-	 */
-	if (use_xsave() &&
-	    !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
-		return;
-
-	if (unlikely(fx->swd & X87_FSW_ES))
-		asm volatile("fnclex");
-	alternative_input(ASM_NOP8 ASM_NOP2,
-			  "    emms\n"		/* clear stack tags */
-			  "    fildl %%gs:0",	/* load to clear state */
-			  X86_FEATURE_FXSAVE_LEAK);
-}
-
-static inline void clear_fpu_state(struct task_struct *tsk)
-{
-	fpu_clear(&tsk->thread.fpu);
-}
-
 static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 {
 	int err;
@@ -177,16 +147,6 @@ static inline void fpu_fxsave(struct fpu *fpu)
 		     : [fx] "R" (&fpu->state->fxsave));
 }
 
-static inline void fpu_save_init(struct fpu *fpu)
-{
-	if (use_xsave())
-		fpu_xsave(fpu);
-	else
-		fpu_fxsave(fpu);
-
-	fpu_clear(fpu);
-}
-
 #else  /* CONFIG_X86_32 */
 
 #ifdef CONFIG_MATH_EMULATION
@@ -211,6 +171,19 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 	return 0;
 }
 
+static inline void fpu_fxsave(struct fpu *fpu)
+{
+	/* Use more nops than strictly needed in case the compiler
+	   varies code */
+	alternative_input(
+		"fnsave %[fx] ;fwait;" GENERIC_NOP2,
+		"fxsave %[fx]\n",
+		X86_FEATURE_FXSR,
+		[fx] "m" (fpu->state->fxsave) : "memory");
+}
+
+#endif	/* CONFIG_X86_64 */
+
 /* We need a safe address that is cheap to find and that is already
    in L1 during context switch. The best choices are unfortunately
    different for UP and SMP */
@@ -225,52 +198,35 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
  */
 static inline void fpu_save_init(struct fpu *fpu)
 {
+	struct i387_fxsave_struct *fx = &fpu->state->fxsave;
+
 	if (use_xsave()) {
 		struct xsave_struct *xstate = &fpu->state->xsave;
-		struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-
 		fpu_xsave(fpu);
-
 		/*
 		 * xsave header may indicate the init state of the FP.
 		 */
 		if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
-			goto end;
-
+			return;
 		if (unlikely(fx->swd & X87_FSW_ES))
 			asm volatile("fnclex");
-
-		/*
-		 * we can do a simple return here or be paranoid :)
-		 */
-		goto clear_state;
+	} else {
+		fpu_fxsave(fpu);
+		if (cpu_has_fxsr && unlikely(fx->swd & X87_FSW_ES))
+			asm volatile("fnclex");
 	}
 
-	/* Use more nops than strictly needed in case the compiler
-	   varies code */
-	alternative_input(
-		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
-		"fxsave %[fx]\n"
-		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
-		X86_FEATURE_FXSR,
-		[fx] "m" (fpu->state->fxsave),
-		[fsw] "m" (fpu->state->fxsave.swd) : "memory");
-clear_state:
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. safe_address is a random variable that should be in L1 */
 	alternative_input(
-		GENERIC_NOP8 GENERIC_NOP2,
+		ASM_NOP8 ASM_NOP2,
 		"emms\n\t"	  	/* clear stack tags */
-		"fildl %[addr]", 	/* set F?P to defined value */
+		"fildl %P[addr]", 	/* set F?P to defined value */
 		X86_FEATURE_FXSAVE_LEAK,
 		[addr] "m" (safe_address));
-end:
-	;
 }
 
-#endif	/* CONFIG_X86_64 */
-
 static inline void __save_init_fpu(struct task_struct *tsk)
 {
 	fpu_save_init(&tsk->thread.fpu);
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (8 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 09/11] x86: Merge fpu_save_init() Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 19:00   ` Pekka Enberg
  2010-08-28 16:04 ` [PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros Brian Gerst
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

Remove ifdefs for code that the compiler can optimize away on 64-bit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h |   12 ++++++------
 arch/x86/kernel/i387.c      |    4 ----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 58cb6f0..3b4675d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf);
 extern int restore_i387_xstate_ia32(void __user *buf);
 #endif
 
+#ifdef CONFIG_MATH_EMULATION
+extern void finit_soft_fpu(struct i387_soft_struct *soft);
+#else
+static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
+#endif
+
 #define X87_FSW_ES (1 << 7)	/* Exception Summary */
 
 static __always_inline __pure bool use_xsaveopt(void)
@@ -149,12 +155,6 @@ static inline void fpu_fxsave(struct fpu *fpu)
 
 #else  /* CONFIG_X86_32 */
 
-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
 /* perform fxrstor iff the processor has extended states, otherwise frstor */
 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index b1a732d..e21c138 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -74,10 +74,8 @@ static void __cpuinit init_thread_xstate(void)
 
 	if (cpu_has_fxsr)
 		xstate_size = sizeof(struct i387_fxsave_struct);
-#ifdef CONFIG_X86_32
 	else
 		xstate_size = sizeof(struct i387_fsave_struct);
-#endif
 }
 
 /*
@@ -113,12 +111,10 @@ void __cpuinit fpu_init(void)
 
 void fpu_finit(struct fpu *fpu)
 {
-#ifdef CONFIG_X86_32
 	if (!HAVE_HWFP) {
 		finit_soft_fpu(&fpu->state->soft);
 		return;
 	}
-#endif
 
 	if (cpu_has_fxsr) {
 		struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros
  2010-08-28 16:04 x86: FPU cleanups Brian Gerst
                   ` (9 preceding siblings ...)
  2010-08-28 16:04 ` [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code Brian Gerst
@ 2010-08-28 16:04 ` Brian Gerst
  2010-08-29 19:02   ` Pekka Enberg
  10 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-28 16:04 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-kernel

The PSHUFB_XMM5_* macros are no longer used.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/i387.h |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 3b4675d..eeeca88 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -421,7 +421,4 @@ extern void fpu_finit(struct fpu *fpu);
 
 #endif /* __ASSEMBLY__ */
 
-#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
-#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
-
 #endif /* _ASM_X86_I387_H */
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH 01/11] x86: Use correct type for %cr4
  2010-08-28 16:04 ` [PATCH 01/11] x86: Use correct type for %cr4 Brian Gerst
@ 2010-08-29 18:24   ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:24 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> %cr4 is 64-bit in 64-bit mode (although the upper 32-bits are currently reserved).
> Use unsigned long for the temporary variable to get the right size.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 02/11] x86: Merge fpu_init()
  2010-08-28 16:04 ` [PATCH 02/11] x86: Merge fpu_init() Brian Gerst
@ 2010-08-29 18:29   ` Pekka Enberg
  2010-08-30  0:44     ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:29 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> @@ -80,7 +80,6 @@ static void __cpuinit init_thread_xstate(void)
>  #endif
>  }
>
> -#ifdef CONFIG_X86_64
>  /*
>  * Called at bootup to set up the initial FPU state that is later cloned
>  * into all processes.
> @@ -88,12 +87,20 @@ static void __cpuinit init_thread_xstate(void)
>
>  void __cpuinit fpu_init(void)
>  {
> -       unsigned long oldcr0 = read_cr0();
> +       unsigned long cr0;
> +       unsigned long cr4_mask = 0;
>
> -       set_in_cr4(X86_CR4_OSFXSR);
> -       set_in_cr4(X86_CR4_OSXMMEXCPT);
> +       if (cpu_has_fxsr)
> +               cr4_mask |= X86_CR4_OSFXSR;
> +       if (cpu_has_xmm)
> +               cr4_mask |= X86_CR4_OSXMMEXCPT;
> +       set_in_cr4(cr4_mask);

Is calling set_in_cr4() unconditionally safe for 32-bit CPUs that
don't have cr4? AFAICT, no, because it uses read_cr4().

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/11] x86: Merge tolerant_fwait()
  2010-08-28 16:04 ` [PATCH 03/11] x86: Merge tolerant_fwait() Brian Gerst
@ 2010-08-29 18:32   ` Pekka Enberg
  2010-08-30  0:35     ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:32 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> Now that 32-bit can handle exceptions from the kernel, switch to using
> the 64-bit version of tolerant_fwait() without fnclex.  In the unlikely
> event an exception is triggered, it is simply ignored.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

Why is 32-bit able to handle exceptions? Is that part of this series
or is it some existing commit in tip or linus?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 04/11] x86: Merge __save_init_fpu()
  2010-08-28 16:04 ` [PATCH 04/11] x86: Merge __save_init_fpu() Brian Gerst
@ 2010-08-29 18:33   ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:33 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> __save_init_fpu() is identical for 32-bit and 64-bit.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU
  2010-08-28 16:04 ` [PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU Brian Gerst
@ 2010-08-29 18:38   ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:38 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> Consolidates code and fixes the below race for 64-bit.
>
> commit 9fa2f37bfeb798728241cc4a19578ce6e4258f25
> Author: torvalds <torvalds>
> Date:   Tue Sep 2 07:37:25 2003 +0000
>
>    Be a lot more careful about TS_USEDFPU and preemption
>
>    We had some races where we testecd (or set) TS_USEDFPU together
>    with sequences that depended on the setting (like clearing or
>    setting the TS flag in %cr0) and we could be preempted in between,
>    which screws up the FPU state, since preemption will itself change
>    USEDFPU and the TS flag.
>
>    This makes it a lot more explicit: the "internal" low-level FPU
>    functions ("__xxxx_fpu()") all require preemption to be disabled,
>    and the exported "real" functions will make sure that is the case.
>
>    One case - in __switch_to() - was switched to the non-preempt-safe
>    internal version, since the scheduler itself has already disabled
>    preemption.
>
>    BKrev: 3f5448b5WRiQuyzAlbajs3qoQjSobw
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr()
  2010-08-28 16:04 ` [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr() Brian Gerst
@ 2010-08-29 18:41   ` Pekka Enberg
  2010-08-30  0:25     ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:41 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> While %ds still contains the userspace selector, %cs is KERNEL_CS
> at this point.  Always get %cs from pt_regs.
>
> It actually is possible to get the correct segments for compat tasks,
> but that involves using the [f]xsave instruction without a REX.W prefix.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

It might be just me but the above description doesn't explain
anything. What's the problem here? What is this fixing?

> ---
>  arch/x86/kernel/i387.c |   18 ++++++++----------
>  1 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
> index c795675..b1a732d 100644
> --- a/arch/x86/kernel/i387.c
> +++ b/arch/x86/kernel/i387.c
> @@ -383,19 +383,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
>  #ifdef CONFIG_X86_64
>        env->fip = fxsave->rip;
>        env->foo = fxsave->rdp;
> +       /*
> +        * should be actually ds/cs at fpu exception time, but
> +        * that information is not available in 64bit mode.
> +        */
> +       env->fcs = task_pt_regs(tsk)->cs;
>        if (tsk == current) {
> -               /*
> -                * should be actually ds/cs at fpu exception time, but
> -                * that information is not available in 64bit mode.
> -                */
> -               asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
> -               asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
> +               savesegment(ds, env->fos);
>        } else {
> -               struct pt_regs *regs = task_pt_regs(tsk);
> -
> -               env->fos = 0xffff0000 | tsk->thread.ds;
> -               env->fcs = regs->cs;
> +               env->fos = tsk->thread.ds;
>        }
> +       env->fos |= 0xffff0000;
>  #else
>        env->fip = fxsave->fip;
>        env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
> --
> 1.7.2.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor
  2010-08-28 16:04 ` [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor Brian Gerst
@ 2010-08-29 18:45   ` Pekka Enberg
  2010-08-29 23:44     ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:45 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> Use the "R" constraint (legacy register) instead of listing all the
> possible registers.  Clean up the comments as well.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> ---
>  arch/x86/include/asm/i387.h |   44 ++++++++++++++++--------------------------
>  1 files changed, 17 insertions(+), 27 deletions(-)
>
> diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
> index 8b40a83..768fcb2 100644
> --- a/arch/x86/include/asm/i387.h
> +++ b/arch/x86/include/asm/i387.h
> @@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
>  {
>        int err;
>
> +       /* See comment in fxsave() below. */
>        asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
>                     "2:\n"
>                     ".section .fixup,\"ax\"\n"
> @@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
>                     ".previous\n"
>                     _ASM_EXTABLE(1b, 3b)
>                     : [err] "=r" (err)
> -#if 0 /* See comment in fxsave() below. */
> -                    : [fx] "r" (fx), "m" (*fx), "0" (0));
> -#else
> -                    : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
> -#endif
> +                    : [fx] "R" (fx), "m" (*fx), "0" (0));

Please correct me if I'm wrong but "legacy registers" also include bp
and sp that are not part of the original constraints:

http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html

So why is it OK to use "R" here?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 08/11] x86-32: Remove math_emulate stub
  2010-08-28 16:04 ` [PATCH 08/11] x86-32: Remove math_emulate stub Brian Gerst
@ 2010-08-29 18:47   ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:47 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> check_fpu() in bugs.c halts boot if no FPU is found and math emulation
> isn't enabled.  Therefore this stub will never be used.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 09/11] x86: Merge fpu_save_init()
  2010-08-28 16:04 ` [PATCH 09/11] x86: Merge fpu_save_init() Brian Gerst
@ 2010-08-29 18:54   ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 18:54 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> Merge 32-bit and 64-bit fpu_save_init().
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

This patch is pretty hard to review. Maybe you could split this into
two patches where the first preparational patch would introduce some
helpers such as fpu_fxsave() on 32-bit. It should be easier to review
the fpu_save_init() merging in a second patch that way.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
  2010-08-28 16:04 ` [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code Brian Gerst
@ 2010-08-29 19:00   ` Pekka Enberg
  2010-08-29 23:38     ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 19:00 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> Remove ifdefs for code that the compiler can optimize away on 64-bit.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

> @@ -74,10 +74,8 @@ static void __cpuinit init_thread_xstate(void)
>
>        if (cpu_has_fxsr)
>                xstate_size = sizeof(struct i387_fxsave_struct);
> -#ifdef CONFIG_X86_32
>        else
>                xstate_size = sizeof(struct i387_fsave_struct);
> -#endif
>  }

I guess this is OK but keep in mind that cpu_has_fsxr is _not_
optimized by the compiler on 64-bit so the change probably increases
kernel text by few bytes.

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros
  2010-08-28 16:04 ` [PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros Brian Gerst
@ 2010-08-29 19:02   ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-29 19:02 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
> The PSHUFB_XMM5_* macros are no longer used.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
  2010-08-29 19:00   ` Pekka Enberg
@ 2010-08-29 23:38     ` Brian Gerst
  2010-08-30  5:44       ` Pekka Enberg
  0 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-29 23:38 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Sun, Aug 29, 2010 at 3:00 PM, Pekka Enberg <penberg@kernel.org> wrote:
> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> Remove ifdefs for code that the compiler can optimize away on 64-bit.
>>
>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>
>> @@ -74,10 +74,8 @@ static void __cpuinit init_thread_xstate(void)
>>
>>        if (cpu_has_fxsr)
>>                xstate_size = sizeof(struct i387_fxsave_struct);
>> -#ifdef CONFIG_X86_32
>>        else
>>                xstate_size = sizeof(struct i387_fsave_struct);
>> -#endif
>>  }
>
> I guess this is OK but keep in mind that cpu_has_fsxr is _not_
> optimized by the compiler on 64-bit so the change probably increases
> kernel text by few bytes.
>
> Acked-by: Pekka Enberg <penberg@kernel.org>
>

FXSR is a required feature on 64-bit, therefore cpu_has_fxsr is always true.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor
  2010-08-29 18:45   ` Pekka Enberg
@ 2010-08-29 23:44     ` Brian Gerst
  2010-08-30  5:56       ` Pekka Enberg
  0 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-29 23:44 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Sun, Aug 29, 2010 at 2:45 PM, Pekka Enberg <penberg@kernel.org> wrote:
> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> Use the "R" constraint (legacy register) instead of listing all the
>> possible registers.  Clean up the comments as well.
>>
>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>> ---
>>  arch/x86/include/asm/i387.h |   44 ++++++++++++++++--------------------------
>>  1 files changed, 17 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
>> index 8b40a83..768fcb2 100644
>> --- a/arch/x86/include/asm/i387.h
>> +++ b/arch/x86/include/asm/i387.h
>> @@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
>>  {
>>        int err;
>>
>> +       /* See comment in fxsave() below. */
>>        asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
>>                     "2:\n"
>>                     ".section .fixup,\"ax\"\n"
>> @@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
>>                     ".previous\n"
>>                     _ASM_EXTABLE(1b, 3b)
>>                     : [err] "=r" (err)
>> -#if 0 /* See comment in fxsave() below. */
>> -                    : [fx] "r" (fx), "m" (*fx), "0" (0));
>> -#else
>> -                    : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
>> -#endif
>> +                    : [fx] "R" (fx), "m" (*fx), "0" (0));
>
> Please correct me if I'm wrong but "legacy registers" also include bp
> and sp that are not part of the original constraints:
>
> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
>
> So why is it OK to use "R" here?
>

There is no constraint to explicitly request %bp (or %sp), but there
is no reason it could not be used.  The compiler would never choose
%sp for "R" for the same reason it wouldn't for "r": it's not
available as a general purpose register.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr()
  2010-08-29 18:41   ` Pekka Enberg
@ 2010-08-30  0:25     ` Brian Gerst
  2010-08-30  6:44       ` Pekka Enberg
  0 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-30  0:25 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Sun, Aug 29, 2010 at 2:41 PM, Pekka Enberg <penberg@kernel.org> wrote:
> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> While %ds still contains the userspace selector, %cs is KERNEL_CS
>> at this point.  Always get %cs from pt_regs.
>>
>> It actually is possible to get the correct segments for compat tasks,
>> but that involves using the [f]xsave instruction without a REX.W prefix.
>>
>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>
> It might be just me but the above description doesn't explain
> anything. What's the problem here? What is this fixing?

The %cs segment being reported to a compat task is flat out wrong.  It
is getting KERNEL_CS when it should be some userspace segment.  The
code segment may still be wrong, because the %cs in pt_regs may not
have been the segment where the instruction that flagged the exception
executed from.  That could be fixed by using fxsave without a REX.W
prefix when saving the state of compat tasks, which would save the
segment and 32-bit offset instead of the 64-bit offset for the code
and data pointers.  This is such a corner case that it probably isn't
worth putting much effort into fixing unless someone demonstrates a
real need for it.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/11] x86: Merge tolerant_fwait()
  2010-08-29 18:32   ` Pekka Enberg
@ 2010-08-30  0:35     ` Brian Gerst
  2010-08-30  5:47       ` Pekka Enberg
  0 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-30  0:35 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Sun, Aug 29, 2010 at 2:32 PM, Pekka Enberg <penberg@kernel.org> wrote:
> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> Now that 32-bit can handle exceptions from the kernel, switch to using
>> the 64-bit version of tolerant_fwait() without fnclex.  In the unlikely
>> event an exception is triggered, it is simply ignored.
>>
>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>
> Why is 32-bit able to handle exceptions? Is that part of this series
> or is it some existing commit in tip or linus?
>

Commit e2e75c915de045f0785387dc32f55e92fab0614c merged the 32-bit and
64-bit math exception handlers into one, which calls
fixup_exception().

--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 02/11] x86: Merge fpu_init()
  2010-08-29 18:29   ` Pekka Enberg
@ 2010-08-30  0:44     ` Brian Gerst
  0 siblings, 0 replies; 35+ messages in thread
From: Brian Gerst @ 2010-08-30  0:44 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Sun, Aug 29, 2010 at 2:29 PM, Pekka Enberg <penberg@kernel.org> wrote:
> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>> @@ -80,7 +80,6 @@ static void __cpuinit init_thread_xstate(void)
>>  #endif
>>  }
>>
>> -#ifdef CONFIG_X86_64
>>  /*
>>  * Called at bootup to set up the initial FPU state that is later cloned
>>  * into all processes.
>> @@ -88,12 +87,20 @@ static void __cpuinit init_thread_xstate(void)
>>
>>  void __cpuinit fpu_init(void)
>>  {
>> -       unsigned long oldcr0 = read_cr0();
>> +       unsigned long cr0;
>> +       unsigned long cr4_mask = 0;
>>
>> -       set_in_cr4(X86_CR4_OSFXSR);
>> -       set_in_cr4(X86_CR4_OSXMMEXCPT);
>> +       if (cpu_has_fxsr)
>> +               cr4_mask |= X86_CR4_OSFXSR;
>> +       if (cpu_has_xmm)
>> +               cr4_mask |= X86_CR4_OSXMMEXCPT;
>> +       set_in_cr4(cr4_mask);
>
> Is calling set_in_cr4() unconditionally safe for 32-bit CPUs that
> don't have cr4? AFAICT, no, because it uses read_cr4().
>

Good catch, will fix.
--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
  2010-08-29 23:38     ` Brian Gerst
@ 2010-08-30  5:44       ` Pekka Enberg
  2010-08-30 11:21         ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-30  5:44 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

  Hi Brian,

On 8/30/10 2:38 AM, Brian Gerst wrote:
> On Sun, Aug 29, 2010 at 3:00 PM, Pekka Enberg<penberg@kernel.org>  wrote:
>> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst<brgerst@gmail.com>  wrote:
>>> Remove ifdefs for code that the compiler can optimize away on 64-bit.
>>>
>>> Signed-off-by: Brian Gerst<brgerst@gmail.com>
>>> @@ -74,10 +74,8 @@ static void __cpuinit init_thread_xstate(void)
>>>
>>>         if (cpu_has_fxsr)
>>>                 xstate_size = sizeof(struct i387_fxsave_struct);
>>> -#ifdef CONFIG_X86_32
>>>         else
>>>                 xstate_size = sizeof(struct i387_fsave_struct);
>>> -#endif
>>>   }
>> I guess this is OK but keep in mind that cpu_has_fsxr is _not_
>> optimized by the compiler on 64-bit so the change probably increases
>> kernel text by few bytes.
> FXSR is a required feature on 64-bit, therefore cpu_has_fxsr is always true.
Yes, I realize that but it will still read boot_cpu_data at runtime, no?

             Pekka

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/11] x86: Merge tolerant_fwait()
  2010-08-30  0:35     ` Brian Gerst
@ 2010-08-30  5:47       ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-30  5:47 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

  On 8/30/10 3:35 AM, Brian Gerst wrote:
> On Sun, Aug 29, 2010 at 2:32 PM, Pekka Enberg<penberg@kernel.org>  wrote:
>> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst<brgerst@gmail.com>  wrote:
>>> Now that 32-bit can handle exceptions from the kernel, switch to using
>>> the 64-bit version of tolerant_fwait() without fnclex.  In the unlikely
>>> event an exception is triggered, it is simply ignored.
>>>
>>> Signed-off-by: Brian Gerst<brgerst@gmail.com>
>> Why is 32-bit able to handle exceptions? Is that part of this series
>> or is it some existing commit in tip or linus?
>>
> Commit e2e75c915de045f0785387dc32f55e92fab0614c merged the 32-bit and
> 64-bit math exception handlers into one, which calls
> fixup_exception().
That could be in the changelog.

Acked-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor
  2010-08-29 23:44     ` Brian Gerst
@ 2010-08-30  5:56       ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-30  5:56 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

  On 8/30/10 2:44 AM, Brian Gerst wrote:
> On Sun, Aug 29, 2010 at 2:45 PM, Pekka Enberg<penberg@kernel.org>  wrote:
>> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst<brgerst@gmail.com>  wrote:
>>> Use the "R" constraint (legacy register) instead of listing all the
>>> possible registers.  Clean up the comments as well.
>>>
>>> Signed-off-by: Brian Gerst<brgerst@gmail.com>
>>> ---
>>>   arch/x86/include/asm/i387.h |   44 ++++++++++++++++--------------------------
>>>   1 files changed, 17 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
>>> index 8b40a83..768fcb2 100644
>>> --- a/arch/x86/include/asm/i387.h
>>> +++ b/arch/x86/include/asm/i387.h
>>> @@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
>>>   {
>>>         int err;
>>>
>>> +       /* See comment in fxsave() below. */
>>>         asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
>>>                      "2:\n"
>>>                      ".section .fixup,\"ax\"\n"
>>> @@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
>>>                      ".previous\n"
>>>                      _ASM_EXTABLE(1b, 3b)
>>>                      : [err] "=r" (err)
>>> -#if 0 /* See comment in fxsave() below. */
>>> -                    : [fx] "r" (fx), "m" (*fx), "0" (0));
>>> -#else
>>> -                    : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
>>> -#endif
>>> +                    : [fx] "R" (fx), "m" (*fx), "0" (0));
>> Please correct me if I'm wrong but "legacy registers" also include bp
>> and sp that are not part of the original constraints:
>>
>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
>>
>> So why is it OK to use "R" here?
> There is no constraint to explicitly request %bp (or %sp), but there
> is no reason it could not be used.  The compiler would never choose
> %sp for "R" for the same reason it wouldn't for "r": it's not
> available as a general purpose register.
Yeah, if I try to force GCC to use bp or sp, it will complain that it 
runs out of legacy registers:

error: can't find a register in class `LEGACY_REGS' while reloading `asm'

which makes sense as it knows that sp and bp are used by the function 
that contains the inline asm.

Acked-by: Pekka Enberg <penberg@kernel.org>

             Pekka

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr()
  2010-08-30  0:25     ` Brian Gerst
@ 2010-08-30  6:44       ` Pekka Enberg
  2010-08-30 11:38         ` Brian Gerst
  0 siblings, 1 reply; 35+ messages in thread
From: Pekka Enberg @ 2010-08-30  6:44 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>>> While %ds still contains the userspace selector, %cs is KERNEL_CS
>>> at this point.  Always get %cs from pt_regs.
>>>
>>> It actually is possible to get the correct segments for compat tasks,
>>> but that involves using the [f]xsave instruction without a REX.W prefix.
>>>
>>> Signed-off-by: Brian Gerst <brgerst@gmail.com>

On Sun, Aug 29, 2010 at 2:41 PM, Pekka Enberg <penberg@kernel.org> wrote:
>> It might be just me but the above description doesn't explain
>> anything. What's the problem here? What is this fixing?

On Mon, Aug 30, 2010 at 3:25 AM, Brian Gerst <brgerst@gmail.com> wrote:
> The %cs segment being reported to a compat task is flat out wrong.  It
> is getting KERNEL_CS when it should be some userspace segment.  The
> code segment may still be wrong, because the %cs in pt_regs may not
> have been the segment where the instruction that flagged the exception
> executed from.  That could be fixed by using fxsave without a REX.W
> prefix when saving the state of compat tasks, which would save the
> segment and 32-bit offset instead of the 64-bit offset for the code
> and data pointers.  This is such a corner case that it probably isn't
> worth putting much effort into fixing unless someone demonstrates a
> real need for it.

I sort of was able to deduce most of that from the original
description. However, I still don't quite understand what the problem
causes. Just a wrong cs reported to a signal handler or something
else?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
  2010-08-30  5:44       ` Pekka Enberg
@ 2010-08-30 11:21         ` Brian Gerst
  2010-08-30 11:25           ` Pekka Enberg
  0 siblings, 1 reply; 35+ messages in thread
From: Brian Gerst @ 2010-08-30 11:21 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Mon, Aug 30, 2010 at 1:44 AM, Pekka Enberg <penberg@kernel.org> wrote:
>  Hi Brian,
>
> On 8/30/10 2:38 AM, Brian Gerst wrote:
>>
>> On Sun, Aug 29, 2010 at 3:00 PM, Pekka Enberg<penberg@kernel.org>  wrote:
>>>
>>> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst<brgerst@gmail.com>  wrote:
>>>>
>>>> Remove ifdefs for code that the compiler can optimize away on 64-bit.
>>>>
>>>> Signed-off-by: Brian Gerst<brgerst@gmail.com>
>>>> @@ -74,10 +74,8 @@ static void __cpuinit init_thread_xstate(void)
>>>>
>>>>        if (cpu_has_fxsr)
>>>>                xstate_size = sizeof(struct i387_fxsave_struct);
>>>> -#ifdef CONFIG_X86_32
>>>>        else
>>>>                xstate_size = sizeof(struct i387_fsave_struct);
>>>> -#endif
>>>>  }
>>>
>>> I guess this is OK but keep in mind that cpu_has_fsxr is _not_
>>> optimized by the compiler on 64-bit so the change probably increases
>>> kernel text by few bytes.
>>
>> FXSR is a required feature on 64-bit, therefore cpu_has_fxsr is always
>> true.
>
> Yes, I realize that but it will still read boot_cpu_data at runtime, no?

Look at cpu_has().  It checks REQUIRED_MASK* if the feature bit is a
constant, and returns true without testing the actual bit in
boot_cpu_data for required features.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code.
  2010-08-30 11:21         ` Brian Gerst
@ 2010-08-30 11:25           ` Pekka Enberg
  0 siblings, 0 replies; 35+ messages in thread
From: Pekka Enberg @ 2010-08-30 11:25 UTC (permalink / raw)
  To: Brian Gerst; +Cc: hpa, x86, linux-kernel

  On 8/30/10 2:21 PM, Brian Gerst wrote:
> FXSR is a required feature on 64-bit, therefore cpu_has_fxsr is always
>>> true.
>> Yes, I realize that but it will still read boot_cpu_data at runtime, no?
> Look at cpu_has().  It checks REQUIRED_MASK* if the feature bit is a
> constant, and returns true without testing the actual bit in
> boot_cpu_data for required features.
Right you are, thanks for the explanation!

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr()
  2010-08-30  6:44       ` Pekka Enberg
@ 2010-08-30 11:38         ` Brian Gerst
  0 siblings, 0 replies; 35+ messages in thread
From: Brian Gerst @ 2010-08-30 11:38 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: hpa, x86, linux-kernel

On Mon, Aug 30, 2010 at 2:44 AM, Pekka Enberg <penberg@kernel.org> wrote:
> On Sat, Aug 28, 2010 at 7:04 PM, Brian Gerst <brgerst@gmail.com> wrote:
>>>> While %ds still contains the userspace selector, %cs is KERNEL_CS
>>>> at this point.  Always get %cs from pt_regs.
>>>>
>>>> It actually is possible to get the correct segments for compat tasks,
>>>> but that involves using the [f]xsave instruction without a REX.W prefix.
>>>>
>>>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>
> On Sun, Aug 29, 2010 at 2:41 PM, Pekka Enberg <penberg@kernel.org> wrote:
>>> It might be just me but the above description doesn't explain
>>> anything. What's the problem here? What is this fixing?
>
> On Mon, Aug 30, 2010 at 3:25 AM, Brian Gerst <brgerst@gmail.com> wrote:
>> The %cs segment being reported to a compat task is flat out wrong.  It
>> is getting KERNEL_CS when it should be some userspace segment.  The
>> code segment may still be wrong, because the %cs in pt_regs may not
>> have been the segment where the instruction that flagged the exception
>> executed from.  That could be fixed by using fxsave without a REX.W
>> prefix when saving the state of compat tasks, which would save the
>> segment and 32-bit offset instead of the 64-bit offset for the code
>> and data pointers.  This is such a corner case that it probably isn't
>> worth putting much effort into fixing unless someone demonstrates a
>> real need for it.
>
> I sort of was able to deduce most of that from the original
> description. However, I still don't quite understand what the problem
> causes. Just a wrong cs reported to a signal handler or something
> else?
>

The wrong cs value is reported to userspace.  I don't know of any apps
that actually cares about it, except for possibly wine or dosemu.  In
any app that doesn't use alternate segments, the above fix will be
give the correct cs every time (since it never changes).

--
Brian Gerst

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2010-08-30 11:38 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-28 16:04 x86: FPU cleanups Brian Gerst
2010-08-28 16:04 ` [PATCH 01/11] x86: Use correct type for %cr4 Brian Gerst
2010-08-29 18:24   ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 02/11] x86: Merge fpu_init() Brian Gerst
2010-08-29 18:29   ` Pekka Enberg
2010-08-30  0:44     ` Brian Gerst
2010-08-28 16:04 ` [PATCH 03/11] x86: Merge tolerant_fwait() Brian Gerst
2010-08-29 18:32   ` Pekka Enberg
2010-08-30  0:35     ` Brian Gerst
2010-08-30  5:47       ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 04/11] x86: Merge __save_init_fpu() Brian Gerst
2010-08-29 18:33   ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 05/11] x86-64: Disable preemption when using TS_USEDFPU Brian Gerst
2010-08-29 18:38   ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 06/11] x86-64: Fix %cs value in convert_from_fxsr() Brian Gerst
2010-08-29 18:41   ` Pekka Enberg
2010-08-30  0:25     ` Brian Gerst
2010-08-30  6:44       ` Pekka Enberg
2010-08-30 11:38         ` Brian Gerst
2010-08-28 16:04 ` [PATCH 07/11] x86-64: Simplify constraints for fxsave/fxtstor Brian Gerst
2010-08-29 18:45   ` Pekka Enberg
2010-08-29 23:44     ` Brian Gerst
2010-08-30  5:56       ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 08/11] x86-32: Remove math_emulate stub Brian Gerst
2010-08-29 18:47   ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 09/11] x86: Merge fpu_save_init() Brian Gerst
2010-08-29 18:54   ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 10/11] x86: Remove unnecessary ifdefs from i387 code Brian Gerst
2010-08-29 19:00   ` Pekka Enberg
2010-08-29 23:38     ` Brian Gerst
2010-08-30  5:44       ` Pekka Enberg
2010-08-30 11:21         ` Brian Gerst
2010-08-30 11:25           ` Pekka Enberg
2010-08-28 16:04 ` [PATCH 11/11] x86: Remove PSHUFB_XMM5_* macros Brian Gerst
2010-08-29 19:02   ` Pekka Enberg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.