linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 1/2] x86, fpu: split FPU state from task struct - v3
@ 2008-03-03 23:02 Suresh Siddha
  2008-03-03 23:02 ` [patch 2/2] x86, fpu: lazy allocation of FPU area " Suresh Siddha
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Suresh Siddha @ 2008-03-03 23:02 UTC (permalink / raw)
  To: mingo, hpa, tglx, andi, hch; +Cc: linux-kernel, Suresh Siddha, Arjan van de Ven

[-- Attachment #1: x86-split-fp-from-task-struct.patch --]
[-- Type: text/plain, Size: 24277 bytes --]

Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:

1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.

2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
v3: used weak attribute instead of macro's for architecture overrides.
v2: Removed the cosmetic macros and the need for x86 defining its own
task allocators using __HAVE_ARCH_TASK_STRUCT_ALLOCATOR. Other minor cleanups.
---

Index: linux-2.6-x86/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/process_64.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/process_64.c	2008-03-03 14:13:18.000000000 -0800
@@ -634,7 +634,7 @@
 
 	/* we're going to use this soon, after a few expensive things */
 	if (next_p->fpu_counter>5)
-		prefetch(&next->i387.fxsave);
+		prefetch(next->xstate);
 
 	/*
 	 * Reload esp0, LDT and the page table pointer:
Index: linux-2.6-x86/arch/x86/kernel/traps_64.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/traps_64.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/traps_64.c	2008-03-03 14:13:18.000000000 -0800
@@ -1121,7 +1121,7 @@
 
 	if (!used_math())
 		init_fpu(me);
-	restore_fpu_checking(&me->thread.i387.fxsave);
+	restore_fpu_checking(&me->thread.xstate->fxsave);
 	task_thread_info(me)->status |= TS_USEDFPU;
 	me->fpu_counter++;
 }
@@ -1157,6 +1157,10 @@
 #endif
        
 	/*
+	 * initialize the per thread extended state:
+	 */
+        init_thread_xstate();
+	/*
 	 * Should be a barrier for any external CPU state.
 	 */
 	cpu_init();
Index: linux-2.6-x86/kernel/fork.c
===================================================================
--- linux-2.6-x86.orig/kernel/fork.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/kernel/fork.c	2008-03-03 14:13:39.000000000 -0800
@@ -132,6 +132,10 @@
 		free_task(tsk);
 }
 
+void __attribute__((weak)) arch_task_cache_init(void)
+{
+}
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -144,6 +148,9 @@
 			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
 #endif
 
+	/* do the arch specific task caches init */
+	arch_task_cache_init();
+
 	/*
 	 * The default maximum number of threads is set to a safe
 	 * value: the thread structures can take up at most half
@@ -163,6 +170,13 @@
 		init_task.signal->rlim[RLIMIT_NPROC];
 }
 
+int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
+					       struct task_struct *src)
+{
+	*dst = *src;
+	return 0;
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
 	struct task_struct *tsk;
@@ -181,15 +195,15 @@
 		return NULL;
 	}
 
-	*tsk = *orig;
+ 	err = arch_dup_task_struct(tsk, orig);
+	if (err)
+		goto out;
+
 	tsk->stack = ti;
 
 	err = prop_local_init_single(&tsk->dirties);
-	if (err) {
-		free_thread_info(ti);
-		free_task_struct(tsk);
-		return NULL;
-	}
+	if (err)
+		goto out;
 
 	setup_thread_stack(tsk, orig);
 
@@ -205,6 +219,11 @@
 #endif
 	tsk->splice_pipe = NULL;
 	return tsk;
+
+out:
+	free_thread_info(ti);
+	free_task_struct(tsk);
+	return NULL;
 }
 
 #ifdef CONFIG_MMU
Index: linux-2.6-x86/arch/x86/kernel/i387.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/i387.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/i387.c	2008-03-03 14:13:18.000000000 -0800
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/regset.h>
+#include <linux/bootmem.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/math_emu.h>
@@ -40,16 +41,17 @@
 #endif
 
 static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+unsigned int xstate_size;
+static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 
-void mxcsr_feature_mask_init(void)
+void __cpuinit mxcsr_feature_mask_init(void)
 {
 	unsigned long mask = 0;
 	clts();
 	if (cpu_has_fxsr) {
-		memset(&current->thread.i387.fxsave, 0,
-		       sizeof(struct i387_fxsave_struct));
-		asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
-		mask = current->thread.i387.fxsave.mxcsr_mask;
+		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
+		asm volatile("fxsave %0" : : "m" (fx_scratch));
+		mask = fx_scratch.mxcsr_mask;
 		if (mask == 0)
 			mask = 0x0000ffbf;
 	}
@@ -57,6 +59,17 @@
 	stts();
 }
 
+void __init init_thread_xstate(void)
+{
+	if (cpu_has_fxsr)
+		xstate_size = sizeof(struct i387_fxsave_struct);
+#ifdef CONFIG_X86_32
+	else
+		xstate_size = sizeof(struct i387_fsave_struct);
+#endif
+	init_task.thread.xstate = alloc_bootmem(xstate_size);
+}
+
 #ifdef CONFIG_X86_64
 /*
  * Called at bootup to set up the initial FPU state that is later cloned
@@ -65,10 +78,7 @@
 void __cpuinit fpu_init(void)
 {
 	unsigned long oldcr0 = read_cr0();
-	extern void __bad_fxsave_alignment(void);
 
-	if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
-		__bad_fxsave_alignment();
 	set_in_cr4(X86_CR4_OSFXSR);
 	set_in_cr4(X86_CR4_OSXMMEXCPT);
 
@@ -96,18 +106,19 @@
 	}
 
 	if (cpu_has_fxsr) {
-		memset(&tsk->thread.i387.fxsave, 0,
-		       sizeof(struct i387_fxsave_struct));
-		tsk->thread.i387.fxsave.cwd = 0x37f;
+		struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+
+		memset(fx, 0, xstate_size);
+		fx->cwd = 0x37f;
 		if (cpu_has_xmm)
-			tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT;
+			fx->mxcsr = MXCSR_DEFAULT;
 	} else {
-		memset(&tsk->thread.i387.fsave, 0,
-		       sizeof(struct i387_fsave_struct));
-		tsk->thread.i387.fsave.cwd = 0xffff037fu;
-		tsk->thread.i387.fsave.swd = 0xffff0000u;
-		tsk->thread.i387.fsave.twd = 0xffffffffu;
-		tsk->thread.i387.fsave.fos = 0xffff0000u;
+		struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
+		memset(fp, 0, xstate_size);
+		fp->cwd = 0xffff037fu;
+		fp->swd = 0xffff0000u;
+		fp->twd = 0xffffffffu;
+		fp->fos = 0xffff0000u;
 	}
 	/*
 	 * Only the device not available exception or ptrace can call init_fpu.
@@ -135,7 +146,7 @@
 	init_fpu(target);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.i387.fxsave, 0, -1);
+				   &target->thread.xstate->fxsave, 0, -1);
 }
 
 int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -151,12 +162,12 @@
 	set_stopped_child_used_math(target);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.i387.fxsave, 0, -1);
+				 &target->thread.xstate->fxsave, 0, -1);
 
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
-	target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+	target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 
 	return ret;
 }
@@ -235,7 +246,7 @@
 static void convert_from_fxsr(struct user_i387_ia32_struct *env,
 			      struct task_struct *tsk)
 {
-	struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+	struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
 	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
 	struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
 	int i;
@@ -274,7 +285,7 @@
 			    const struct user_i387_ia32_struct *env)
 
 {
-	struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+	struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
 	struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
 	struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
 	int i;
@@ -311,7 +322,7 @@
 
 	if (!cpu_has_fxsr)
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-					   &target->thread.i387.fsave, 0, -1);
+					   &target->thread.xstate->fsave, 0, -1);
 
 	if (kbuf && pos == 0 && count == sizeof(env)) {
 		convert_from_fxsr(kbuf, target);
@@ -337,7 +348,7 @@
 
 	if (!cpu_has_fxsr)
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					  &target->thread.i387.fsave, 0, -1);
+					  &target->thread.xstate->fsave, 0, -1);
 
 	if (pos > 0 || count < sizeof(env))
 		convert_from_fxsr(&env, target);
@@ -356,11 +367,11 @@
 static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
+	struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
 
 	unlazy_fpu(tsk);
-	tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
-	if (__copy_to_user(buf, &tsk->thread.i387.fsave,
-			   sizeof(struct i387_fsave_struct)))
+	fp->status = fp->swd;
+	if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
 		return -1;
 	return 1;
 }
@@ -368,6 +379,7 @@
 static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
+	struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
 	struct user_i387_ia32_struct env;
 	int err = 0;
 
@@ -377,12 +389,12 @@
 	if (__copy_to_user(buf, &env, sizeof(env)))
 		return -1;
 
-	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
+	err |= __put_user(fx->swd, &buf->status);
 	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
 	if (err)
 		return -1;
 
-	if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+	if (__copy_to_user(&buf->_fxsr_env[0], fx,
 			   sizeof(struct i387_fxsave_struct)))
 		return -1;
 	return 1;
@@ -415,7 +427,7 @@
 {
 	struct task_struct *tsk = current;
 	clear_fpu(tsk);
-	return __copy_from_user(&tsk->thread.i387.fsave, buf,
+	return __copy_from_user(&tsk->thread.xstate->fsave, buf,
 				sizeof(struct i387_fsave_struct));
 }
 
@@ -425,10 +437,10 @@
 	struct task_struct *tsk = current;
 	struct user_i387_ia32_struct env;
 	clear_fpu(tsk);
-	err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+	err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
 			       sizeof(struct i387_fxsave_struct));
 	/* mxcsr reserved bits must be masked to zero for security reasons */
-	tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+	tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 	if (err || __copy_from_user(&env, buf, sizeof(env)))
 		return 1;
 	convert_to_fxsr(tsk, &env);
Index: linux-2.6-x86/include/asm-x86/i387.h
===================================================================
--- linux-2.6-x86.orig/include/asm-x86/i387.h	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/include/asm-x86/i387.h	2008-03-03 14:13:18.000000000 -0800
@@ -23,6 +23,7 @@
 extern void mxcsr_feature_mask_init(void);
 extern void init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
+extern void init_thread_xstate(void);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -116,24 +117,22 @@
 	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
 	   starting with gas 2.16. */
 	__asm__ __volatile__("fxsaveq %0"
-			     : "=m" (tsk->thread.i387.fxsave));
+			     : "=m" (tsk->thread.xstate->fxsave));
 #elif 0
 	/* Using, as a workaround, the properly prefixed form below isn't
 	   accepted by any binutils version so far released, complaining that
 	   the same type of prefix is used twice if an extended register is
 	   needed for addressing (fix submitted to mainline 2005-11-21). */
 	__asm__ __volatile__("rex64/fxsave %0"
-			     : "=m" (tsk->thread.i387.fxsave));
+			     : "=m" (tsk->thread.xstate->fxsave));
 #else
 	/* This, however, we can work around by forcing the compiler to select
 	   an addressing mode that doesn't require extended registers. */
-	__asm__ __volatile__("rex64/fxsave %P2(%1)"
-			     : "=m" (tsk->thread.i387.fxsave)
-			     : "cdaSDb" (tsk),
-				"i" (offsetof(__typeof__(*tsk),
-					      thread.i387.fxsave)));
+	__asm__ __volatile__("rex64/fxsave (%1)"
+			     : "=m" (tsk->thread.xstate->fxsave)
+			     : "cdaSDb" (&tsk->thread.xstate->fxsave));
 #endif
-	clear_fpu_state(&tsk->thread.i387.fxsave);
+	clear_fpu_state(&tsk->thread.xstate->fxsave);
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
@@ -147,7 +146,7 @@
 	int err = 0;
 
 	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.i387.fxsave));
+			sizeof(tsk->thread.xstate->fxsave));
 
 	if ((unsigned long)buf % 16)
 		printk("save_i387: bad fpstate %p\n", buf);
@@ -161,7 +160,7 @@
 		task_thread_info(tsk)->status &= ~TS_USEDFPU;
 		stts();
 	} else {
-		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
 				   sizeof(struct i387_fxsave_struct)))
 			return -1;
 	}
@@ -198,7 +197,7 @@
 		"nop ; frstor %1",
 		"fxrstor %1",
 		X86_FEATURE_FXSR,
-		"m" ((tsk)->thread.i387.fxsave));
+		"m" (tsk->thread.xstate->fxsave));
 }
 
 /* We need a safe address that is cheap to find and that is already
@@ -222,8 +221,8 @@
 		"fxsave %[fx]\n"
 		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
 		X86_FEATURE_FXSR,
-		[fx] "m" (tsk->thread.i387.fxsave),
-		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+		[fx] "m" (tsk->thread.xstate->fxsave),
+		[fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. safe_address is a random variable that should be in L1 */
@@ -324,25 +323,25 @@
 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
 {
 	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.cwd;
+		return tsk->thread.xstate->fxsave.cwd;
 	} else {
-		return (unsigned short)tsk->thread.i387.fsave.cwd;
+		return (unsigned short) tsk->thread.xstate->fsave.cwd;
 	}
 }
 
 static inline unsigned short get_fpu_swd(struct task_struct *tsk)
 {
 	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.swd;
+		return tsk->thread.xstate->fxsave.swd;
 	} else {
-		return (unsigned short)tsk->thread.i387.fsave.swd;
+		return (unsigned short) tsk->thread.xstate->fsave.swd;
 	}
 }
 
 static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
 {
 	if (cpu_has_xmm) {
-		return tsk->thread.i387.fxsave.mxcsr;
+		return tsk->thread.xstate->fxsave.mxcsr;
 	} else {
 		return MXCSR_DEFAULT;
 	}
Index: linux-2.6-x86/include/asm-x86/processor.h
===================================================================
--- linux-2.6-x86.orig/include/asm-x86/processor.h	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/include/asm-x86/processor.h	2008-03-03 14:13:18.000000000 -0800
@@ -340,7 +340,7 @@
 	u32			entry_eip;
 };
 
-union i387_union {
+union thread_xstate {
 	struct i387_fsave_struct	fsave;
 	struct i387_fxsave_struct	fxsave;
 	struct i387_soft_struct		soft;
@@ -353,6 +353,7 @@
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
+extern unsigned int xstate_size;
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
@@ -385,8 +386,8 @@
 	unsigned long		cr2;
 	unsigned long		trap_no;
 	unsigned long		error_code;
-	/* Floating point info: */
-	union i387_union	i387 __attribute__((aligned(16)));;
+	/* floating point and extended processor state */
+	union thread_xstate	*xstate;
 #ifdef CONFIG_X86_32
 	/* Virtual 86 mode info */
 	struct vm86_struct __user *vm86_info;
Index: linux-2.6-x86/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/process_32.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/process_32.c	2008-03-03 14:13:18.000000000 -0800
@@ -672,7 +672,7 @@
 
 	/* we're going to use this soon, after a few expensive things */
 	if (next_p->fpu_counter > 5)
-		prefetch(&next->i387.fxsave);
+		prefetch(next->xstate);
 
 	/*
 	 * Reload esp0.
Index: linux-2.6-x86/arch/x86/kernel/traps_32.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/traps_32.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/traps_32.c	2008-03-03 14:13:18.000000000 -0800
@@ -1229,11 +1229,6 @@
 #endif
 	set_trap_gate(19, &simd_coprocessor_error);
 
-	/*
-	 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
-	 * Generate a build-time error if the alignment is wrong.
-	 */
-	BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
 	if (cpu_has_fxsr) {
 		printk(KERN_INFO "Enabling fast FPU save and restore... ");
 		set_in_cr4(X86_CR4_OSFXSR);
@@ -1254,6 +1249,7 @@
 
 	set_bit(SYSCALL_VECTOR, used_vectors);
 
+	init_thread_xstate();
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
Index: linux-2.6-x86/include/asm-x86/thread_info.h
===================================================================
--- linux-2.6-x86.orig/include/asm-x86/thread_info.h	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/include/asm-x86/thread_info.h	2008-03-03 14:13:18.000000000 -0800
@@ -1,5 +1,13 @@
+#ifndef _ASM_X86_THREAD_INFO_H
 #ifdef CONFIG_X86_32
 # include "thread_info_32.h"
 #else
 # include "thread_info_64.h"
 #endif
+
+#ifndef __ASSEMBLY__
+extern void arch_task_cache_init(void);
+extern void free_thread_info(struct thread_info *ti);
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+#endif
+#endif /* _ASM_X86_THREAD_INFO_H */
Index: linux-2.6-x86/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/Makefile	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/Makefile	2008-03-03 14:13:18.000000000 -0800
@@ -29,6 +29,7 @@
 obj-$(CONFIG_X86_64)	+= pci-nommu_64.o bugs_64.o
 obj-y			+= tsc_$(BITS).o io_delay.o rtc.o
 
+obj-y				+= process.o
 obj-y				+= i387.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
Index: linux-2.6-x86/arch/x86/kernel/process.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-x86/arch/x86/kernel/process.c	2008-03-03 14:13:18.000000000 -0800
@@ -0,0 +1,35 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+static struct kmem_cache *task_xstate_cachep;
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	*dst = *src;
+	dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+	if (!dst->thread.xstate)
+		return -ENOMEM;
+	WARN_ON((unsigned long)dst->thread.xstate & 15);
+	memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
+	return 0;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+	kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate);
+	ti->task->thread.xstate = NULL;
+
+	free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
+}
+
+void arch_task_cache_init(void)
+{
+        task_xstate_cachep =
+        	kmem_cache_create("task_xstate", xstate_size,
+				  __alignof__(union thread_xstate),
+				  SLAB_PANIC, NULL);
+}
Index: linux-2.6-x86/arch/x86/math-emu/fpu_entry.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/math-emu/fpu_entry.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/math-emu/fpu_entry.c	2008-03-03 14:13:18.000000000 -0800
@@ -677,7 +677,7 @@
 		    unsigned int pos, unsigned int count,
 		    const void *kbuf, const void __user *ubuf)
 {
-	struct i387_soft_struct *s387 = &target->thread.i387.soft;
+	struct i387_soft_struct *s387 = &target->thread.xstate->soft;
 	void *space = s387->st_space;
 	int ret;
 	int offset, other, i, tags, regnr, tag, newtop;
@@ -729,7 +729,7 @@
 		    unsigned int pos, unsigned int count,
 		    void *kbuf, void __user *ubuf)
 {
-	struct i387_soft_struct *s387 = &target->thread.i387.soft;
+	struct i387_soft_struct *s387 = &target->thread.xstate->soft;
 	const void *space = s387->st_space;
 	int ret;
 	int offset = (S387->ftop & 7) * 10, other = 80 - offset;
Index: linux-2.6-x86/arch/x86/math-emu/fpu_system.h
===================================================================
--- linux-2.6-x86.orig/arch/x86/math-emu/fpu_system.h	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/math-emu/fpu_system.h	2008-03-03 14:13:18.000000000 -0800
@@ -35,8 +35,8 @@
 #define SEG_EXPAND_DOWN(s)	(((s).b & ((1 << 11) | (1 << 10))) \
 				 == (1 << 10))
 
-#define I387			(current->thread.i387)
-#define FPU_info		(I387.soft.info)
+#define I387			(current->thread.xstate)
+#define FPU_info		(I387->soft.info)
 
 #define FPU_CS			(*(unsigned short *) &(FPU_info->___cs))
 #define FPU_SS			(*(unsigned short *) &(FPU_info->___ss))
@@ -46,25 +46,25 @@
 #define FPU_EIP			(FPU_info->___eip)
 #define FPU_ORIG_EIP		(FPU_info->___orig_eip)
 
-#define FPU_lookahead           (I387.soft.lookahead)
+#define FPU_lookahead           (I387->soft.lookahead)
 
 /* nz if ip_offset and cs_selector are not to be set for the current
    instruction. */
-#define no_ip_update		(*(u_char *)&(I387.soft.no_update))
-#define FPU_rm			(*(u_char *)&(I387.soft.rm))
+#define no_ip_update		(*(u_char *)&(I387->soft.no_update))
+#define FPU_rm			(*(u_char *)&(I387->soft.rm))
 
 /* Number of bytes of data which can be legally accessed by the current
    instruction. This only needs to hold a number <= 108, so a byte will do. */
-#define access_limit		(*(u_char *)&(I387.soft.alimit))
+#define access_limit		(*(u_char *)&(I387->soft.alimit))
 
-#define partial_status		(I387.soft.swd)
-#define control_word		(I387.soft.cwd)
-#define fpu_tag_word		(I387.soft.twd)
-#define registers		(I387.soft.st_space)
-#define top			(I387.soft.ftop)
+#define partial_status		(I387->soft.swd)
+#define control_word		(I387->soft.cwd)
+#define fpu_tag_word		(I387->soft.twd)
+#define registers		(I387->soft.st_space)
+#define top			(I387->soft.ftop)
 
-#define instruction_address	(*(struct address *)&I387.soft.fip)
-#define operand_address		(*(struct address *)&I387.soft.foo)
+#define instruction_address	(*(struct address *)&I387->soft.fip)
+#define operand_address		(*(struct address *)&I387->soft.foo)
 
 #define FPU_access_ok(x,y,z)	if ( !access_ok(x,y,z) ) \
 				math_abort(FPU_info,SIGSEGV)
Index: linux-2.6-x86/arch/x86/math-emu/reg_ld_str.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/math-emu/reg_ld_str.c	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/arch/x86/math-emu/reg_ld_str.c	2008-03-03 14:13:18.000000000 -0800
@@ -1185,8 +1185,8 @@
 		control_word |= 0xffff0040;
 		partial_status = status_word() | 0xffff0000;
 		fpu_tag_word |= 0xffff0000;
-		I387.soft.fcs &= ~0xf8000000;
-		I387.soft.fos |= 0xffff0000;
+		I387->soft.fcs &= ~0xf8000000;
+		I387->soft.fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
 		if (__copy_to_user(d, &control_word, 7 * 4))
 			FPU_abort;
Index: linux-2.6-x86/include/asm-x86/thread_info_32.h
===================================================================
--- linux-2.6-x86.orig/include/asm-x86/thread_info_32.h	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/include/asm-x86/thread_info_32.h	2008-03-03 14:13:18.000000000 -0800
@@ -102,8 +102,6 @@
 	__get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
 #endif
 
-#define free_thread_info(info)	free_pages((unsigned long)(info), get_order(THREAD_SIZE))
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
Index: linux-2.6-x86/include/asm-x86/thread_info_64.h
===================================================================
--- linux-2.6-x86.orig/include/asm-x86/thread_info_64.h	2008-03-03 14:11:52.000000000 -0800
+++ linux-2.6-x86/include/asm-x86/thread_info_64.h	2008-03-03 14:13:18.000000000 -0800
@@ -85,8 +85,6 @@
 #define alloc_thread_info(tsk) \
 	((struct thread_info *) __get_free_pages(THREAD_FLAGS, THREAD_ORDER))
 
-#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */

-- 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-03 23:02 [patch 1/2] x86, fpu: split FPU state from task struct - v3 Suresh Siddha
@ 2008-03-03 23:02 ` Suresh Siddha
  2008-03-04  1:20   ` Christoph Hellwig
  2008-03-05 19:48   ` Pavel Machek
  2008-03-04  1:18 ` [patch 1/2] x86, fpu: split FPU state from task struct " Christoph Hellwig
  2008-03-04 10:28 ` Ingo Molnar
  2 siblings, 2 replies; 26+ messages in thread
From: Suresh Siddha @ 2008-03-03 23:02 UTC (permalink / raw)
  To: mingo, hpa, tglx, andi, hch; +Cc: linux-kernel, Suresh Siddha, Arjan van de Ven

[-- Attachment #1: x86-lazy-fp-allocation.patch --]
[-- Type: text/plain, Size: 5394 bytes --]

Only allocate the FPU area when the application actually uses FPU, i.e., in the
first lazy FPU trap. This could save memory for non-fpu using apps.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
---
v3: Fixed the non-atomic calling sequence in atomic context.
v2: Ported to x86.git#testing with some name changes.
---

Index: linux-2.6-x86/arch/x86/kernel/i387.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/i387.c	2008-03-03 14:15:17.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/i387.c	2008-03-03 14:15:17.000000000 -0800
@@ -9,7 +9,6 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/regset.h>
-#include <linux/bootmem.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/math_emu.h>
@@ -67,7 +66,6 @@
 	else
 		xstate_size = sizeof(struct i387_fsave_struct);
 #endif
-	init_task.thread.xstate = alloc_bootmem(xstate_size);
 }
 
 #ifdef CONFIG_X86_64
@@ -105,6 +103,12 @@
 		return;
 	}
 
+	/*
+	 * Memory allocation at the first usage of the FPU and other state.
+	 */
+	if (!tsk->thread.xstate)
+		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+
 	if (cpu_has_fxsr) {
 		struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
 
Index: linux-2.6-x86/arch/x86/kernel/process.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/process.c	2008-03-03 14:15:17.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/process.c	2008-03-03 14:15:17.000000000 -0800
@@ -5,24 +5,33 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-static struct kmem_cache *task_xstate_cachep;
+struct kmem_cache *task_xstate_cachep;
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
-	dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
-	if (!dst->thread.xstate)
-		return -ENOMEM;
-	WARN_ON((unsigned long)dst->thread.xstate & 15);
-	memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
+	if (src->thread.xstate) {
+		dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+		if (!dst->thread.xstate)
+			return -ENOMEM;
+		WARN_ON((unsigned long)dst->thread.xstate & 15);
+		memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
+	}
 	return 0;
 }
 
-void free_thread_info(struct thread_info *ti)
+void free_thread_xstate(struct task_struct *tsk)
 {
-	kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate);
-	ti->task->thread.xstate = NULL;
+	if (tsk->thread.xstate) {
+		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
+		tsk->thread.xstate = NULL;
+	}
+}
+
 
+void free_thread_info(struct thread_info *ti)
+{
+	free_thread_xstate(ti->task);
 	free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
 }
 
Index: linux-2.6-x86/include/asm-x86/processor.h
===================================================================
--- linux-2.6-x86.orig/include/asm-x86/processor.h	2008-03-03 14:15:17.000000000 -0800
+++ linux-2.6-x86/include/asm-x86/processor.h	2008-03-03 14:15:17.000000000 -0800
@@ -354,6 +354,8 @@
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int xstate_size;
+extern void free_thread_xstate(struct task_struct *);
+extern struct kmem_cache *task_xstate_cachep;
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
Index: linux-2.6-x86/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/process_32.c	2008-03-03 14:15:17.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/process_32.c	2008-03-03 14:15:17.000000000 -0800
@@ -524,6 +524,10 @@
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+	/*
+	 * Free the old FP and other extended state
+	 */
+	free_thread_xstate(current);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
Index: linux-2.6-x86/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/process_64.c	2008-03-03 14:15:17.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/process_64.c	2008-03-03 14:15:17.000000000 -0800
@@ -552,6 +552,10 @@
 	regs->ss		= __USER_DS;
 	regs->flags		= 0x200;
 	set_fs(USER_DS);
+	/*
+	 * Free the old FP and other extended state
+	 */
+	free_thread_xstate(current);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
Index: linux-2.6-x86/arch/x86/kernel/traps_32.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/traps_32.c	2008-03-03 14:15:17.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/traps_32.c	2008-03-03 14:17:28.000000000 -0800
@@ -1169,9 +1169,20 @@
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
 
-	clts();				/* Allow maths ops (or we recurse) */
-	if (!tsk_used_math(tsk))
+	if (!tsk_used_math(tsk)) {
+#ifdef CONFIG_PREEMPT
+		local_irq_enable();
+#endif
+		/*
+		 * does a slab alloc which can sleep
+		 */
 		init_fpu(tsk);
+#ifdef CONFIG_PREEMPT
+		local_irq_disable();
+#endif
+	}
+
+	clts();				/* Allow maths ops (or we recurse) */
 	restore_fpu(tsk);
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;

-- 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-03 23:02 [patch 1/2] x86, fpu: split FPU state from task struct - v3 Suresh Siddha
  2008-03-03 23:02 ` [patch 2/2] x86, fpu: lazy allocation of FPU area " Suresh Siddha
@ 2008-03-04  1:18 ` Christoph Hellwig
  2008-03-04  1:36   ` Suresh Siddha
  2008-03-04 10:28 ` Ingo Molnar
  2 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2008-03-04  1:18 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: mingo, hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven

On Mon, Mar 03, 2008 at 03:02:45PM -0800, Suresh Siddha wrote:
> +void __attribute__((weak)) arch_task_cache_init(void)
> +{
> +}
> +
>  void __init fork_init(unsigned long mempages)
>  {
>  #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
> @@ -144,6 +148,9 @@
>  			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
>  #endif
>  
> +	/* do the arch specific task caches init */
> +	arch_task_cache_init();

Why can't this just be a normal initcall (with the right level)?

> +int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
> +					       struct task_struct *src)
> +{
> +	*dst = *src;
> +	return 0;
> +}
> +
>  static struct task_struct *dup_task_struct(struct task_struct *orig)
>  {
>  	struct task_struct *tsk;
> @@ -181,15 +195,15 @@
>  		return NULL;
>  	}
>  
> -	*tsk = *orig;
> + 	err = arch_dup_task_struct(tsk, orig);
> +	if (err)
> +		goto out;
> +

You're still adding a second hook instead of re-using or re-naming
setup_thread_stack.  Did I miss a good explanation for that
or was this just an oversight?

Also this weak linkage stuff creaping in is really ugly.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-03 23:02 ` [patch 2/2] x86, fpu: lazy allocation of FPU area " Suresh Siddha
@ 2008-03-04  1:20   ` Christoph Hellwig
  2008-03-04  1:43     ` Suresh Siddha
  2008-03-05 19:48   ` Pavel Machek
  1 sibling, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2008-03-04  1:20 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: mingo, hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven

On Mon, Mar 03, 2008 at 03:02:46PM -0800, Suresh Siddha wrote:
> +	/*
> +	 * Memory allocation at the first usage of the FPU and other state.
> +	 */
> +	if (!tsk->thread.xstate)
> +		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);

Please don't do over 80 char lines.  Also don't we need some kind of
error handling here?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-04  1:18 ` [patch 1/2] x86, fpu: split FPU state from task struct " Christoph Hellwig
@ 2008-03-04  1:36   ` Suresh Siddha
  2008-03-04  8:20     ` Ingo Molnar
  2008-03-06 12:39     ` Christoph Hellwig
  0 siblings, 2 replies; 26+ messages in thread
From: Suresh Siddha @ 2008-03-04  1:36 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Suresh Siddha, mingo, hpa, tglx, andi, linux-kernel, Arjan van de Ven

On Mon, Mar 03, 2008 at 08:18:49PM -0500, Christoph Hellwig wrote:
> On Mon, Mar 03, 2008 at 03:02:45PM -0800, Suresh Siddha wrote:
> > +void __attribute__((weak)) arch_task_cache_init(void)
> > +{
> > +}
> > +
> >  void __init fork_init(unsigned long mempages)
> >  {
> >  #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
> > @@ -144,6 +148,9 @@
> >  			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
> >  #endif
> >  
> > +	/* do the arch specific task caches init */
> > +	arch_task_cache_init();
> 
> Why can't this just be a normal initcall (with the right level)?

This is sort of an extension to the per-task area. And this needs to be done
before any task starts using this state. Thought this is the nice place to
initialize the extension caches along with the main task_struct init.

> 
> > +int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
> > +					       struct task_struct *src)
> > +{
> > +	*dst = *src;
> > +	return 0;
> > +}
> > +
> >  static struct task_struct *dup_task_struct(struct task_struct *orig)
> >  {
> >  	struct task_struct *tsk;
> > @@ -181,15 +195,15 @@
> >  		return NULL;
> >  	}
> >  
> > -	*tsk = *orig;
> > + 	err = arch_dup_task_struct(tsk, orig);
> > +	if (err)
> > +		goto out;
> > +
> 
> You're still adding a second hook instead of re-using or re-naming
> setup_thread_stack.  Did I miss a good explanation for that
> or was this just an oversight?

Not an oversight. setup_thread_stack comes with its own baggage called
__HAVE_THREAD_FUNCTIONS. Thought of keeping this simple and separate
by using an inline or weak linkage.

> Also this weak linkage stuff creaping in is really ugly.

hmm.. any better suggestion?

thanks,
suresh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-04  1:20   ` Christoph Hellwig
@ 2008-03-04  1:43     ` Suresh Siddha
  2008-03-04 10:32       ` Ingo Molnar
  0 siblings, 1 reply; 26+ messages in thread
From: Suresh Siddha @ 2008-03-04  1:43 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Suresh Siddha, mingo, hpa, tglx, andi, linux-kernel, Arjan van de Ven

On Mon, Mar 03, 2008 at 08:20:12PM -0500, Christoph Hellwig wrote:
> On Mon, Mar 03, 2008 at 03:02:46PM -0800, Suresh Siddha wrote:
> > +	/*
> > +	 * Memory allocation at the first usage of the FPU and other state.
> > +	 */
> > +	if (!tsk->thread.xstate)
> > +		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
> 
> Please don't do over 80 char lines.

Ok.

>  Also don't we need some kind of error handling here?

Currently it uses SLAB_PANIC.

thanks,
suresh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-04  1:36   ` Suresh Siddha
@ 2008-03-04  8:20     ` Ingo Molnar
  2008-03-06 12:39     ` Christoph Hellwig
  1 sibling, 0 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-03-04  8:20 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Christoph Hellwig, hpa, tglx, andi, linux-kernel, Arjan van de Ven


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> > Also this weak linkage stuff creaping in is really ugly.
> 
> hmm.. any better suggestion?

i dont find it ugly at all - it's far easier to read than an #ifdef 
maze. And the fact that it's marked "weak" is enough of a visual warning 
IMO that this is just default behavior and that the arch might have 
overriden it.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-03 23:02 [patch 1/2] x86, fpu: split FPU state from task struct - v3 Suresh Siddha
  2008-03-03 23:02 ` [patch 2/2] x86, fpu: lazy allocation of FPU area " Suresh Siddha
  2008-03-04  1:18 ` [patch 1/2] x86, fpu: split FPU state from task struct " Christoph Hellwig
@ 2008-03-04 10:28 ` Ingo Molnar
  2008-03-04 17:59   ` Suresh Siddha
  2 siblings, 1 reply; 26+ messages in thread
From: Ingo Molnar @ 2008-03-04 10:28 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> Split the FPU save area from the task struct. This allows easy 
> migration of FPU context, and it's generally cleaner. It also allows 
> the following two optimizations:

hm, i didnt get a reply from you to:

    http://lkml.org/lkml/2008/3/3/47

I'm uneasy to apply patches without knowing whether previous problems 
are fixed. In the changelog there's a mention of:

  v3: Fixed the non-atomic calling sequence in atomic context.

is that the same bug?

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-04  1:43     ` Suresh Siddha
@ 2008-03-04 10:32       ` Ingo Molnar
  2008-03-04 17:55         ` Suresh Siddha
  0 siblings, 1 reply; 26+ messages in thread
From: Ingo Molnar @ 2008-03-04 10:32 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Christoph Hellwig, hpa, tglx, andi, linux-kernel, Arjan van de Ven


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> On Mon, Mar 03, 2008 at 08:20:12PM -0500, Christoph Hellwig wrote:
> > On Mon, Mar 03, 2008 at 03:02:46PM -0800, Suresh Siddha wrote:
> > > +	/*
> > > +	 * Memory allocation at the first usage of the FPU and other state.
> > > +	 */
> > > +	if (!tsk->thread.xstate)
> > > +		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
> >
> >  Also don't we need some kind of error handling here?
> 
> Currently it uses SLAB_PANIC.

but SLAB_PANIC only covers kmem_cache_create() failures. 

kmem_cache_alloc() can fail (return NULL) and not handling it is a bug.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-04 10:32       ` Ingo Molnar
@ 2008-03-04 17:55         ` Suresh Siddha
  2008-03-05 19:47           ` Pavel Machek
  0 siblings, 1 reply; 26+ messages in thread
From: Suresh Siddha @ 2008-03-04 17:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Suresh Siddha, Christoph Hellwig, hpa, tglx, andi, linux-kernel,
	Arjan van de Ven

On Tue, Mar 04, 2008 at 11:32:20AM +0100, Ingo Molnar wrote:
> 
> * Suresh Siddha <suresh.b.siddha@intel.com> wrote:
> 
> > On Mon, Mar 03, 2008 at 08:20:12PM -0500, Christoph Hellwig wrote:
> > > On Mon, Mar 03, 2008 at 03:02:46PM -0800, Suresh Siddha wrote:
> > > > +	/*
> > > > +	 * Memory allocation at the first usage of the FPU and other state.
> > > > +	 */
> > > > +	if (!tsk->thread.xstate)
> > > > +		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
> > >
> > >  Also don't we need some kind of error handling here?
> > 
> > Currently it uses SLAB_PANIC.
> 
> but SLAB_PANIC only covers kmem_cache_create() failures. 
> 
> kmem_cache_alloc() can fail (return NULL) and not handling it is a bug.

oops. you are correct. Will send a sigsegv in the failure case then. Thanks.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-04 10:28 ` Ingo Molnar
@ 2008-03-04 17:59   ` Suresh Siddha
  2008-03-04 20:53     ` Ingo Molnar
  0 siblings, 1 reply; 26+ messages in thread
From: Suresh Siddha @ 2008-03-04 17:59 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Suresh Siddha, hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven

On Tue, Mar 04, 2008 at 11:28:04AM +0100, Ingo Molnar wrote:
> 
> * Suresh Siddha <suresh.b.siddha@intel.com> wrote:
> 
> > Split the FPU save area from the task struct. This allows easy 
> > migration of FPU context, and it's generally cleaner. It also allows 
> > the following two optimizations:
> 
> hm, i didnt get a reply from you to:
> 
>     http://lkml.org/lkml/2008/3/3/47
> 
> I'm uneasy to apply patches without knowing whether previous problems 
> are fixed. In the changelog there's a mention of:
> 
>   v3: Fixed the non-atomic calling sequence in atomic context.
> 
> is that the same bug?

Yes. Sorry for not pointing out explicitly. Essentially 32bit kernel with
CONFIG_PREEMPT is calling math_state_restore() with interrupts disabled.
Handled this by enabling/disabling the interrupts around the blocking call.
with small code changes ensuring that it is safe to enable/disable interrupts
at this point.

Do you want a v4 or separate patch for handling the kmem_cache_alloc() failure

thanks,
suresh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-04 17:59   ` Suresh Siddha
@ 2008-03-04 20:53     ` Ingo Molnar
  0 siblings, 0 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-03-04 20:53 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> >   v3: Fixed the non-atomic calling sequence in atomic context.
> > 
> > is that the same bug?
> 
> Yes. Sorry for not pointing out explicitly. Essentially 32bit kernel 
> with CONFIG_PREEMPT is calling math_state_restore() with interrupts 
> disabled. Handled this by enabling/disabling the interrupts around the 
> blocking call. with small code changes ensuring that it is safe to 
> enable/disable interrupts at this point.
> 
> Do you want a v4 or separate patch for handling the kmem_cache_alloc() 
> failure

would be nice to resend a v4 - i couldnt pick v3 up due to the pending 
questions. (the NULL thing was quite serious)

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-04 17:55         ` Suresh Siddha
@ 2008-03-05 19:47           ` Pavel Machek
  2008-03-06 15:51             ` Ingo Molnar
  0 siblings, 1 reply; 26+ messages in thread
From: Pavel Machek @ 2008-03-05 19:47 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Ingo Molnar, Christoph Hellwig, hpa, tglx, andi, linux-kernel,
	Arjan van de Ven

On Tue 2008-03-04 09:55:28, Suresh Siddha wrote:
> On Tue, Mar 04, 2008 at 11:32:20AM +0100, Ingo Molnar wrote:
> > 
> > * Suresh Siddha <suresh.b.siddha@intel.com> wrote:
> > 
> > > On Mon, Mar 03, 2008 at 08:20:12PM -0500, Christoph Hellwig wrote:
> > > > On Mon, Mar 03, 2008 at 03:02:46PM -0800, Suresh Siddha wrote:
> > > > > +	/*
> > > > > +	 * Memory allocation at the first usage of the FPU and other state.
> > > > > +	 */
> > > > > +	if (!tsk->thread.xstate)
> > > > > +		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
> > > >
> > > >  Also don't we need some kind of error handling here?
> > > 
> > > Currently it uses SLAB_PANIC.
> > 
> > but SLAB_PANIC only covers kmem_cache_create() failures. 
> > 
> > kmem_cache_alloc() can fail (return NULL) and not handling it is a bug.
> 
> oops. you are correct. Will send a sigsegv in the failure case then. Thanks.

You are introducing possibility of hard to debug error, where previous
code just worked... Does not look like good idea to me.
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-03 23:02 ` [patch 2/2] x86, fpu: lazy allocation of FPU area " Suresh Siddha
  2008-03-04  1:20   ` Christoph Hellwig
@ 2008-03-05 19:48   ` Pavel Machek
  2008-03-06 19:26     ` Suresh Siddha
  1 sibling, 1 reply; 26+ messages in thread
From: Pavel Machek @ 2008-03-05 19:48 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: mingo, hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven

On Mon 2008-03-03 15:02:46, Suresh Siddha wrote:
> Only allocate the FPU area when the application actually uses FPU, i.e., in the
> first lazy FPU trap. This could save memory for non-fpu using apps.

How many such apps are on your system, and how much does this
'optimalization' cost?

ISTR glibc always using FPU...?
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 1/2] x86, fpu: split FPU state from task struct - v3
  2008-03-04  1:36   ` Suresh Siddha
  2008-03-04  8:20     ` Ingo Molnar
@ 2008-03-06 12:39     ` Christoph Hellwig
  1 sibling, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2008-03-06 12:39 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Christoph Hellwig, mingo, hpa, tglx, andi, linux-kernel,
	Arjan van de Ven

On Mon, Mar 03, 2008 at 05:36:49PM -0800, Suresh Siddha wrote:
> > You're still adding a second hook instead of re-using or re-naming
> > setup_thread_stack.  Did I miss a good explanation for that
> > or was this just an oversight?
> 
> Not an oversight. setup_thread_stack comes with its own baggage called
> __HAVE_THREAD_FUNCTIONS. Thought of keeping this simple and separate
> by using an inline or weak linkage.

Well, just switching setup_thread_stack to use whatever you use for your
current callout fixes that.  I really don't want to arch callout doing
the same thing in almost the same place, this leads to twisted and
unreadable code.  Please fix this last bit up and you'll get my full
ACK.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-05 19:47           ` Pavel Machek
@ 2008-03-06 15:51             ` Ingo Molnar
  2008-03-06 19:10               ` Suresh Siddha
  2008-03-06 20:24               ` Pavel Machek
  0 siblings, 2 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-03-06 15:51 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Suresh Siddha, Christoph Hellwig, hpa, tglx, andi, linux-kernel,
	Arjan van de Ven


* Pavel Machek <pavel@ucw.cz> wrote:

> > > kmem_cache_alloc() can fail (return NULL) and not handling it is a 
> > > bug.
> > 
> > oops. you are correct. Will send a sigsegv in the failure case then. 
> > Thanks.
> 
> You are introducing possibility of hard to debug error, where previous 
> code just worked... Does not look like good idea to me.

hm, how does it differ from any other allocation failure? We could fail 
to allocate a pagetable page. We could fail to allocate the task_struct 
to begin with.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-06 15:51             ` Ingo Molnar
@ 2008-03-06 19:10               ` Suresh Siddha
  2008-03-06 20:24               ` Pavel Machek
  1 sibling, 0 replies; 26+ messages in thread
From: Suresh Siddha @ 2008-03-06 19:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pavel Machek, Suresh Siddha, Christoph Hellwig, hpa, tglx, andi,
	linux-kernel, Arjan van de Ven

On Thu, Mar 06, 2008 at 04:51:41PM +0100, Ingo Molnar wrote:
> 
> * Pavel Machek <pavel@ucw.cz> wrote:
> 
> > > > kmem_cache_alloc() can fail (return NULL) and not handling it is a 
> > > > bug.
> > > 
> > > oops. you are correct. Will send a sigsegv in the failure case then. 
> > > Thanks.
> > 
> > You are introducing possibility of hard to debug error, where previous 
> > code just worked... Does not look like good idea to me.
> 
> hm, how does it differ from any other allocation failure? We could fail 
> to allocate a pagetable page. We could fail to allocate the task_struct 
> to begin with.

Yes. This happens under out of memory conditions. And we are also using
GFP_KERNEL here, which can block.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-05 19:48   ` Pavel Machek
@ 2008-03-06 19:26     ` Suresh Siddha
  2008-03-06 21:21       ` Pavel Machek
  0 siblings, 1 reply; 26+ messages in thread
From: Suresh Siddha @ 2008-03-06 19:26 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Suresh Siddha, mingo, hpa, tglx, andi, hch, linux-kernel,
	Arjan van de Ven

On Wed, Mar 05, 2008 at 08:48:01PM +0100, Pavel Machek wrote:
> On Mon 2008-03-03 15:02:46, Suresh Siddha wrote:
> > Only allocate the FPU area when the application actually uses FPU, i.e., in the
> > first lazy FPU trap. This could save memory for non-fpu using apps.
> 
> How many such apps are on your system, and how much does this
> 'optimalization' cost?

On a normal kernel boot, where there are 200 or so tasks running, only 20
or so apps seem to be using FPU.

> ISTR glibc always using FPU...?

Apparently not, alteast not on my system. And also going forward, as we extend
this state, thought is nice to have.

thanks,
suresh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-06 15:51             ` Ingo Molnar
  2008-03-06 19:10               ` Suresh Siddha
@ 2008-03-06 20:24               ` Pavel Machek
  2008-03-06 20:52                 ` Andi Kleen
  2008-03-07 12:29                 ` H. Peter Anvin
  1 sibling, 2 replies; 26+ messages in thread
From: Pavel Machek @ 2008-03-06 20:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Suresh Siddha, Christoph Hellwig, hpa, tglx, andi, linux-kernel,
	Arjan van de Ven

On Thu 2008-03-06 16:51:41, Ingo Molnar wrote:
> 
> * Pavel Machek <pavel@ucw.cz> wrote:
> 
> > > > kmem_cache_alloc() can fail (return NULL) and not handling it is a 
> > > > bug.
> > > 
> > > oops. you are correct. Will send a sigsegv in the failure case then. 
> > > Thanks.
> > 
> > You are introducing possibility of hard to debug error, where previous 
> > code just worked... Does not look like good idea to me.
> 
> hm, how does it differ from any other allocation failure? We could fail 

Well, we should not be sending SIGSEGV...? SIGBUS would be cleaner, or
SIGKILL... what happens when userland tries to catch this one?

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-06 20:24               ` Pavel Machek
@ 2008-03-06 20:52                 ` Andi Kleen
  2008-03-07 12:29                 ` H. Peter Anvin
  1 sibling, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2008-03-06 20:52 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Suresh Siddha, Christoph Hellwig, hpa, tglx,
	linux-kernel, Arjan van de Ven

Pavel Machek <pavel@ucw.cz> writes:
> 
> Well, we should not be sending SIGSEGV...? SIGBUS would be cleaner, or
> SIGKILL... what happens when userland tries to catch this one?

When this happens the kernel is already in a severe out of memory
situation and no matter what you do user land will not be able
to handle this well.

-Andi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-06 19:26     ` Suresh Siddha
@ 2008-03-06 21:21       ` Pavel Machek
  0 siblings, 0 replies; 26+ messages in thread
From: Pavel Machek @ 2008-03-06 21:21 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: mingo, hpa, tglx, andi, hch, linux-kernel, Arjan van de Ven

On Thu 2008-03-06 11:26:53, Suresh Siddha wrote:
> On Wed, Mar 05, 2008 at 08:48:01PM +0100, Pavel Machek wrote:
> > On Mon 2008-03-03 15:02:46, Suresh Siddha wrote:
> > > Only allocate the FPU area when the application actually uses FPU, i.e., in the
> > > first lazy FPU trap. This could save memory for non-fpu using apps.
> > 
> > How many such apps are on your system, and how much does this
> > 'optimalization' cost?
> 
> On a normal kernel boot, where there are 200 or so tasks running, only 20
> or so apps seem to be using FPU.

Aha, now I see it is useful ;-).
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-06 20:24               ` Pavel Machek
  2008-03-06 20:52                 ` Andi Kleen
@ 2008-03-07 12:29                 ` H. Peter Anvin
  2008-03-07 13:06                   ` Arjan van de Ven
  1 sibling, 1 reply; 26+ messages in thread
From: H. Peter Anvin @ 2008-03-07 12:29 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Suresh Siddha, Christoph Hellwig, tglx, andi,
	linux-kernel, Arjan van de Ven

Pavel Machek wrote:
> On Thu 2008-03-06 16:51:41, Ingo Molnar wrote:
>> * Pavel Machek <pavel@ucw.cz> wrote:
>>
>>>>> kmem_cache_alloc() can fail (return NULL) and not handling it is a 
>>>>> bug.
>>>> oops. you are correct. Will send a sigsegv in the failure case then. 
>>>> Thanks.
>>> You are introducing possibility of hard to debug error, where previous 
>>> code just worked... Does not look like good idea to me.
>> hm, how does it differ from any other allocation failure? We could fail 
> 
> Well, we should not be sending SIGSEGV...? SIGBUS would be cleaner, or
> SIGKILL... what happens when userland tries to catch this one?
> 

I'm confused...

Normally when we need memory for userspace and can't get it, we put the 
process to sleep until memory is available.

Why is this different in any way?

	-hpa

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-07 12:29                 ` H. Peter Anvin
@ 2008-03-07 13:06                   ` Arjan van de Ven
  2008-03-07 13:18                     ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: Arjan van de Ven @ 2008-03-07 13:06 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pavel Machek, Ingo Molnar, Suresh Siddha, Christoph Hellwig,
	tglx, andi, linux-kernel

H. Peter Anvin wrote:
> Pavel Machek wrote:
>> On Thu 2008-03-06 16:51:41, Ingo Molnar wrote:
>>> * Pavel Machek <pavel@ucw.cz> wrote:
>>>
>>>>>> kmem_cache_alloc() can fail (return NULL) and not handling it is a 
>>>>>> bug.
>>>>> oops. you are correct. Will send a sigsegv in the failure case 
>>>>> then. Thanks.
>>>> You are introducing possibility of hard to debug error, where 
>>>> previous code just worked... Does not look like good idea to me.
>>> hm, how does it differ from any other allocation failure? We could fail 
>>
>> Well, we should not be sending SIGSEGV...? SIGBUS would be cleaner, or
>> SIGKILL... what happens when userland tries to catch this one?
>>
> 
> I'm confused...
> 
> Normally when we need memory for userspace and can't get it, we put the 
> process to sleep until memory is available.

that's what GFP_KERNEL does
> 
> Why is this different in any way?

this is just for handling the case where that fails
(basically near/totally OOM or the case where you get a fatal signal)

maybe we need a GFP_KILLABLE now that we have a TASK_KILLABLE...


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-07 13:06                   ` Arjan van de Ven
@ 2008-03-07 13:18                     ` Andi Kleen
  2008-03-07 13:20                       ` Arjan van de Ven
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2008-03-07 13:18 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: H. Peter Anvin, Pavel Machek, Ingo Molnar, Suresh Siddha,
	Christoph Hellwig, tglx, andi, linux-kernel

> this is just for handling the case where that fails
> (basically near/totally OOM or the case where you get a fatal signal)

I didn't think GFP_KERNEL was interruptible by signals...
(although sometimes under oom thrashing I think it would be great if it was...) 

-Andi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-07 13:18                     ` Andi Kleen
@ 2008-03-07 13:20                       ` Arjan van de Ven
  2008-03-07 13:27                         ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: Arjan van de Ven @ 2008-03-07 13:20 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Pavel Machek, Ingo Molnar, Suresh Siddha,
	Christoph Hellwig, tglx, linux-kernel

Andi Kleen wrote:
>> this is just for handling the case where that fails
>> (basically near/totally OOM or the case where you get a fatal signal)
> 
> I didn't think GFP_KERNEL was interruptible by signals...
> (although sometimes under oom thrashing I think it would be great if it was...) 

we need to make it (or with GFP_KILLABLE); would make total sense...
(so yeah it was more wishful thinking than reality)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v3
  2008-03-07 13:20                       ` Arjan van de Ven
@ 2008-03-07 13:27                         ` Andi Kleen
  0 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2008-03-07 13:27 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Andi Kleen, H. Peter Anvin, Pavel Machek, Ingo Molnar,
	Suresh Siddha, Christoph Hellwig, tglx, linux-kernel

On Fri, Mar 07, 2008 at 07:20:44AM -0600, Arjan van de Ven wrote:
> Andi Kleen wrote:
> >>this is just for handling the case where that fails
> >>(basically near/totally OOM or the case where you get a fatal signal)
> >
> >I didn't think GFP_KERNEL was interruptible by signals...
> >(although sometimes under oom thrashing I think it would be great if it 
> >was...) 
> 
> we need to make it (or with GFP_KILLABLE); would make total sense...
> (so yeah it was more wishful thinking than reality)

I think it wouldn't be that difficult for the normal anonymous user allocations
(standard page fault path), but doing it for everything would be pretty
hard because you would need to add signal-bail-out paths everywhere. 

But doing it for some simple cases like page fault only would be a nice 
project for someone, shouldn't be too difficult.

-Andi

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-03-07 13:26 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-03 23:02 [patch 1/2] x86, fpu: split FPU state from task struct - v3 Suresh Siddha
2008-03-03 23:02 ` [patch 2/2] x86, fpu: lazy allocation of FPU area " Suresh Siddha
2008-03-04  1:20   ` Christoph Hellwig
2008-03-04  1:43     ` Suresh Siddha
2008-03-04 10:32       ` Ingo Molnar
2008-03-04 17:55         ` Suresh Siddha
2008-03-05 19:47           ` Pavel Machek
2008-03-06 15:51             ` Ingo Molnar
2008-03-06 19:10               ` Suresh Siddha
2008-03-06 20:24               ` Pavel Machek
2008-03-06 20:52                 ` Andi Kleen
2008-03-07 12:29                 ` H. Peter Anvin
2008-03-07 13:06                   ` Arjan van de Ven
2008-03-07 13:18                     ` Andi Kleen
2008-03-07 13:20                       ` Arjan van de Ven
2008-03-07 13:27                         ` Andi Kleen
2008-03-05 19:48   ` Pavel Machek
2008-03-06 19:26     ` Suresh Siddha
2008-03-06 21:21       ` Pavel Machek
2008-03-04  1:18 ` [patch 1/2] x86, fpu: split FPU state from task struct " Christoph Hellwig
2008-03-04  1:36   ` Suresh Siddha
2008-03-04  8:20     ` Ingo Molnar
2008-03-06 12:39     ` Christoph Hellwig
2008-03-04 10:28 ` Ingo Molnar
2008-03-04 17:59   ` Suresh Siddha
2008-03-04 20:53     ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).