All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
@ 2015-07-16 19:14 Dave Hansen
  2015-07-16 19:25 ` Andy Lutomirski
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Dave Hansen @ 2015-07-16 19:14 UTC (permalink / raw)
  To: dave; +Cc: tglx, mingo, hpa, x86, peterz, bp, luto, torvalds, linux-kernel


The FPU rewrite removed the dynamic allocations of 'struct fpu'.
But, this potentially wastes massive amounts of memory (2k per
task on systems that do not have AVX-512 for instance).

Instead of having a separate slab, this patch just appends the
space that we need to the 'task_struct' which we dynamically
allocate already.  This saves from doing an extra slab allocation
at fork().  The only real downside here is that we have to stick
everything and the end of the task_struct.  But, I think the
BUILD_BUG_ON()s I stuck in there should keep that from being too
fragile.

This survives a quick build and boot in a VM.  Does anyone see any
real downsides to this?

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org

---

 b/arch/x86/include/asm/fpu/types.h |   72 +++++++++++++++++++------------------
 b/arch/x86/include/asm/processor.h |   10 +++--
 b/arch/x86/kernel/fpu/init.c       |   39 ++++++++++++++++++++
 b/arch/x86/kernel/process.c        |    2 -
 b/fs/proc/kcore.c                  |    4 +-
 b/include/linux/sched.h            |   12 +++++-
 b/kernel/fork.c                    |    8 +++-
 7 files changed, 104 insertions(+), 43 deletions(-)

diff -puN arch/x86/include/asm/processor.h~dynamically-allocate-struct-fpu arch/x86/include/asm/processor.h
--- a/arch/x86/include/asm/processor.h~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.340570967 -0700
+++ b/arch/x86/include/asm/processor.h	2015-07-16 11:37:19.182539322 -0700
@@ -390,9 +390,6 @@ struct thread_struct {
 #endif
 	unsigned long		gs;
 
-	/* Floating point and extended processor state */
-	struct fpu		fpu;
-
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
 	/* Debug status used for traps, single steps, etc... */
@@ -418,6 +415,13 @@ struct thread_struct {
 	unsigned long		iopl;
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
+
+	/* Floating point and extended processor state */
+	struct fpu		fpu;
+	/*
+	 * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
+	 * the end.
+	 */
 };
 
 /*
diff -puN include/linux/sched.h~dynamically-allocate-struct-fpu include/linux/sched.h
--- a/include/linux/sched.h~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.342571058 -0700
+++ b/include/linux/sched.h	2015-07-16 11:44:10.775174149 -0700
@@ -1522,8 +1522,6 @@ struct task_struct {
 /* hung task detection */
 	unsigned long last_switch_count;
 #endif
-/* CPU-specific state of this task */
-	struct thread_struct thread;
 /* filesystem information */
 	struct fs_struct *fs;
 /* open file information */
@@ -1778,8 +1776,18 @@ struct task_struct {
 	unsigned long	task_state_change;
 #endif
 	int pagefault_disabled;
+/* CPU-specific state of this task */
+	struct thread_struct thread;
+/*
+ * WARNING: on x86, 'thread_struct' contains a variable-sized
+ * structure.  It *MUST* be at the end of 'task_struct'.
+ *
+ * Do not put anything below here!
+ */
 };
 
+extern int arch_task_struct_size(void);
+
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
diff -puN arch/x86/kernel/fpu/init.c~dynamically-allocate-struct-fpu arch/x86/kernel/fpu/init.c
--- a/arch/x86/kernel/fpu/init.c~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.355571648 -0700
+++ b/arch/x86/kernel/fpu/init.c	2015-07-16 12:02:15.284280976 -0700
@@ -136,6 +136,45 @@ static void __init fpu__init_system_gene
 unsigned int xstate_size;
 EXPORT_SYMBOL_GPL(xstate_size);
 
+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
+	BUILD_BUG_ON((sizeof(TYPE) -			\
+			offsetof(TYPE, MEMBER) -	\
+			sizeof(((TYPE *)0)->MEMBER)) > 	\
+			0)				\
+
+/*
+ * We append the 'struct fpu' to the task_struct.
+ */
+int __weak arch_task_struct_size(void)
+{
+	int task_size = sizeof(struct task_struct);
+
+	/*
+	 * Subtract off the static size of the register state.
+	 * It potentially has a bunch of padding.
+	 */
+	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
+
+	/*
+	 * Add back the dynamically-calculated register state
+	 * size.
+	 */
+	task_size += xstate_size;
+
+	/*
+	 * We dynamically size 'struct fpu', so we require that
+	 * it be at the end of 'thread_struct' and that
+	 * 'thread_struct' be at the end of 'task_struct'.  If
+	 * you hit a compile error here, check the structure to
+	 * see if something got added to the end.
+	 */
+	CHECK_MEMBER_AT_END_OF(struct fpu, state);
+	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
+	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
+
+	return task_size;
+}
+
 /*
  * Set up the xstate_size based on the legacy FPU context size.
  *
diff -puN kernel/fork.c~dynamically-allocate-struct-fpu kernel/fork.c
--- a/kernel/fork.c~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.357571739 -0700
+++ b/kernel/fork.c	2015-07-16 11:25:53.873498188 -0700
@@ -287,15 +287,21 @@ static void set_max_threads(unsigned int
 	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
 }
 
+int __weak arch_task_struct_size(void)
+{
+	return sizeof(struct task_struct);
+}
+
 void __init fork_init(void)
 {
+	int task_struct_size = arch_task_struct_size();
 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
 #endif
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
-		kmem_cache_create("task_struct", sizeof(struct task_struct),
+		kmem_cache_create("task_struct", task_struct_size,
 			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
 #endif
 
diff -puN arch/x86/include/asm/fpu/types.h~dynamically-allocate-struct-fpu arch/x86/include/asm/fpu/types.h
--- a/arch/x86/include/asm/fpu/types.h~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.358571784 -0700
+++ b/arch/x86/include/asm/fpu/types.h	2015-07-16 11:34:14.471174584 -0700
@@ -189,6 +189,7 @@ union fpregs_state {
 	struct fxregs_state		fxsave;
 	struct swregs_state		soft;
 	struct xregs_state		xsave;
+	u8 __padding[PAGE_SIZE];
 };
 
 /*
@@ -198,40 +199,6 @@ union fpregs_state {
  */
 struct fpu {
 	/*
-	 * @state:
-	 *
-	 * In-memory copy of all FPU registers that we save/restore
-	 * over context switches. If the task is using the FPU then
-	 * the registers in the FPU are more recent than this state
-	 * copy. If the task context-switches away then they get
-	 * saved here and represent the FPU state.
-	 *
-	 * After context switches there may be a (short) time period
-	 * during which the in-FPU hardware registers are unchanged
-	 * and still perfectly match this state, if the tasks
-	 * scheduled afterwards are not using the FPU.
-	 *
-	 * This is the 'lazy restore' window of optimization, which
-	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
-	 *
-	 * We detect whether a subsequent task uses the FPU via setting
-	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
-	 *
-	 * During this window, if the task gets scheduled again, we
-	 * might be able to skip having to do a restore from this
-	 * memory buffer to the hardware registers - at the cost of
-	 * incurring the overhead of #NM fault traps.
-	 *
-	 * Note that on modern CPUs that support the XSAVEOPT (or other
-	 * optimized XSAVE instructions), we don't use #NM traps anymore,
-	 * as the hardware can track whether FPU registers need saving
-	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
-	 * logic, which unconditionally saves/restores all FPU state
-	 * across context switches. (if FPU state exists.)
-	 */
-	union fpregs_state		state;
-
-	/*
 	 * @last_cpu:
 	 *
 	 * Records the last CPU on which this context was loaded into
@@ -288,6 +255,43 @@ struct fpu {
 	 * deal with bursty apps that only use the FPU for a short time:
 	 */
 	unsigned char			counter;
+	/*
+	 * @state:
+	 *
+	 * In-memory copy of all FPU registers that we save/restore
+	 * over context switches. If the task is using the FPU then
+	 * the registers in the FPU are more recent than this state
+	 * copy. If the task context-switches away then they get
+	 * saved here and represent the FPU state.
+	 *
+	 * After context switches there may be a (short) time period
+	 * during which the in-FPU hardware registers are unchanged
+	 * and still perfectly match this state, if the tasks
+	 * scheduled afterwards are not using the FPU.
+	 *
+	 * This is the 'lazy restore' window of optimization, which
+	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+	 *
+	 * We detect whether a subsequent task uses the FPU via setting
+	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+	 *
+	 * During this window, if the task gets scheduled again, we
+	 * might be able to skip having to do a restore from this
+	 * memory buffer to the hardware registers - at the cost of
+	 * incurring the overhead of #NM fault traps.
+	 *
+	 * Note that on modern CPUs that support the XSAVEOPT (or other
+	 * optimized XSAVE instructions), we don't use #NM traps anymore,
+	 * as the hardware can track whether FPU registers need saving
+	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+	 * logic, which unconditionally saves/restores all FPU state
+	 * across context switches. (if FPU state exists.)
+	 */
+	union fpregs_state		state;
+	/*
+	 * WARNING: 'state' is dynamically-sized.  Do not put
+	 * anything after it here.
+	 */
 };
 
 #endif /* _ASM_X86_FPU_H */
diff -puN arch/x86/kernel/process.c~dynamically-allocate-struct-fpu arch/x86/kernel/process.c
--- a/arch/x86/kernel/process.c~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.360571875 -0700
+++ b/arch/x86/kernel/process.c	2015-07-16 12:00:59.204808551 -0700
@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
  */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	*dst = *src;
+	memcpy(dst, src, arch_task_struct_size());
 
 	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
 }
diff -puN fs/proc/kcore.c~dynamically-allocate-struct-fpu fs/proc/kcore.c
--- a/fs/proc/kcore.c~dynamically-allocate-struct-fpu	2015-07-16 11:40:13.787445254 -0700
+++ b/fs/proc/kcore.c	2015-07-16 11:42:04.397453020 -0700
@@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr,
 			     roundup(sizeof(CORE_STR), 4)) +
 			roundup(sizeof(struct elf_prstatus), 4) +
 			roundup(sizeof(struct elf_prpsinfo), 4) +
-			roundup(sizeof(struct task_struct), 4);
+			roundup(arch_task_struct_size(), 4);
 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
 	return size + *elf_buflen;
 }
@@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bu
 	/* set up the task structure */
 	notes[2].name	= CORE_STR;
 	notes[2].type	= NT_TASKSTRUCT;
-	notes[2].datasz	= sizeof(struct task_struct);
+	notes[2].datasz	= arch_task_struct_size();
 	notes[2].data	= current;
 
 	nhdr->p_filesz	+= notesize(&notes[2]);
_

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 19:14 [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Dave Hansen
@ 2015-07-16 19:25 ` Andy Lutomirski
  2015-07-16 21:29   ` Dave Hansen
  2015-07-16 22:35 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Andy Lutomirski @ 2015-07-16 19:25 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Peter Zijlstra, Borislav Petkov, Linus Torvalds, linux-kernel

On Thu, Jul 16, 2015 at 12:14 PM, Dave Hansen <dave@sr71.net> wrote:
>
> The FPU rewrite removed the dynamic allocations of 'struct fpu'.
> But, this potentially wastes massive amounts of memory (2k per
> task on systems that do not have AVX-512 for instance).
>
> Instead of having a separate slab, this patch just appends the
> space that we need to the 'task_struct' which we dynamically
> allocate already.  This saves from doing an extra slab allocation
> at fork().  The only real downside here is that we have to stick
> everything and the end of the task_struct.  But, I think the
> BUILD_BUG_ON()s I stuck in there should keep that from being too
> fragile.
>
> This survives a quick build and boot in a VM.  Does anyone see any
> real downsides to this?

Looks generally sensible.  Minor nitpicking below.

> +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)   \
> +       BUILD_BUG_ON((sizeof(TYPE) -                    \
> +                       offsetof(TYPE, MEMBER) -        \
> +                       sizeof(((TYPE *)0)->MEMBER)) >  \
> +                       0)                              \
> +

You could save a bit of typing by using offsetofend here.  Something
along the lines of BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE,
MEMBER));

>  #endif /* _ASM_X86_FPU_H */
> diff -puN arch/x86/kernel/process.c~dynamically-allocate-struct-fpu arch/x86/kernel/process.c
> --- a/arch/x86/kernel/process.c~dynamically-allocate-struct-fpu 2015-07-16 10:50:42.360571875 -0700
> +++ b/arch/x86/kernel/process.c 2015-07-16 12:00:59.204808551 -0700
> @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
>   */
>  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>  {
> -       *dst = *src;
> +       memcpy(dst, src, arch_task_struct_size());

This is actually vaguely performance-critical, which makes me thing
that using some kind of inline or other real way (config macro, ifdef,
etc) to detect whether there's an arch override would be better than a
weak function.

--Andy

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 19:25 ` Andy Lutomirski
@ 2015-07-16 21:29   ` Dave Hansen
  2015-07-17  8:45     ` Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Hansen @ 2015-07-16 21:29 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Peter Zijlstra, Borislav Petkov, Linus Torvalds, linux-kernel

On 07/16/2015 12:25 PM, Andy Lutomirski wrote:
> On Thu, Jul 16, 2015 at 12:14 PM, Dave Hansen <dave@sr71.net> wrote:
>> +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)   \
>> +       BUILD_BUG_ON((sizeof(TYPE) -                    \
>> +                       offsetof(TYPE, MEMBER) -        \
>> +                       sizeof(((TYPE *)0)->MEMBER)) >  \
>> +                       0)                              \
>> +
> 
> You could save a bit of typing by using offsetofend here.  Something
> along the lines of BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE,
> MEMBER));

Good point.

>>  #endif /* _ASM_X86_FPU_H */
>> diff -puN arch/x86/kernel/process.c~dynamically-allocate-struct-fpu arch/x86/kernel/process.c
>> --- a/arch/x86/kernel/process.c~dynamically-allocate-struct-fpu 2015-07-16 10:50:42.360571875 -0700
>> +++ b/arch/x86/kernel/process.c 2015-07-16 12:00:59.204808551 -0700
>> @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
>>   */
>>  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>>  {
>> -       *dst = *src;
>> +       memcpy(dst, src, arch_task_struct_size());
> 
> This is actually vaguely performance-critical, which makes me thing
> that using some kind of inline or other real way (config macro, ifdef,
> etc) to detect whether there's an arch override would be better than a
> weak function.

Fair enough.  I'll send out another version in a bit if there are no
more comments.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 19:14 [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Dave Hansen
  2015-07-16 19:25 ` Andy Lutomirski
@ 2015-07-16 22:35 ` Peter Zijlstra
  2015-07-17  8:39   ` Ingo Molnar
  2015-07-16 22:42 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' H. Peter Anvin
  2015-07-17  8:23 ` Ingo Molnar
  3 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2015-07-16 22:35 UTC (permalink / raw)
  To: Dave Hansen; +Cc: tglx, mingo, hpa, x86, bp, luto, torvalds, linux-kernel

On Thu, Jul 16, 2015 at 12:14:37PM -0700, Dave Hansen wrote:
> +++ b/arch/x86/kernel/fpu/init.c	2015-07-16 12:02:15.284280976 -0700
> @@ -136,6 +136,45 @@ static void __init fpu__init_system_gene
>  unsigned int xstate_size;
>  EXPORT_SYMBOL_GPL(xstate_size);
>  
> +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
> +	BUILD_BUG_ON((sizeof(TYPE) -			\
> +			offsetof(TYPE, MEMBER) -	\
> +			sizeof(((TYPE *)0)->MEMBER)) > 	\
> +			0)				\
> +
> +/*
> + * We append the 'struct fpu' to the task_struct.
> + */
> +int __weak arch_task_struct_size(void)
> +{
> +	int task_size = sizeof(struct task_struct);
> +
> +	/*
> +	 * Subtract off the static size of the register state.
> +	 * It potentially has a bunch of padding.
> +	 */
> +	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
> +
> +	/*
> +	 * Add back the dynamically-calculated register state
> +	 * size.
> +	 */
> +	task_size += xstate_size;
> +
> +	/*
> +	 * We dynamically size 'struct fpu', so we require that
> +	 * it be at the end of 'thread_struct' and that
> +	 * 'thread_struct' be at the end of 'task_struct'.  If
> +	 * you hit a compile error here, check the structure to
> +	 * see if something got added to the end.
> +	 */
> +	CHECK_MEMBER_AT_END_OF(struct fpu, state);
> +	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
> +	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
> +
> +	return task_size;
> +}

Since you want these invariants true at all times, maybe put the
BUILD_BUG_ON() in generic code instead of x86 specific? That way people
poking at other archs are less likely to accidentally break your stuff.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 19:14 [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Dave Hansen
  2015-07-16 19:25 ` Andy Lutomirski
  2015-07-16 22:35 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Peter Zijlstra
@ 2015-07-16 22:42 ` H. Peter Anvin
  2015-07-16 22:57   ` Andy Lutomirski
  2015-07-18  3:40   ` Ingo Molnar
  2015-07-17  8:23 ` Ingo Molnar
  3 siblings, 2 replies; 12+ messages in thread
From: H. Peter Anvin @ 2015-07-16 22:42 UTC (permalink / raw)
  To: Dave Hansen; +Cc: tglx, mingo, x86, peterz, bp, luto, torvalds, linux-kernel

On 07/16/2015 12:14 PM, Dave Hansen wrote:
> The FPU rewrite removed the dynamic allocations of 'struct fpu'.
> But, this potentially wastes massive amounts of memory (2k per
> task on systems that do not have AVX-512 for instance).
> 
> Instead of having a separate slab, this patch just appends the
> space that we need to the 'task_struct' which we dynamically
> allocate already.  This saves from doing an extra slab allocation
> at fork().  The only real downside here is that we have to stick
> everything and the end of the task_struct.  But, I think the
> BUILD_BUG_ON()s I stuck in there should keep that from being too
> fragile.
> 
> This survives a quick build and boot in a VM.  Does anyone see any
> real downsides to this?
> 

No.  I have also long advocated for merging task_struct and thread_info
into a common structure and get it off the stack; it would improve
security and avoid weird corner cases in the irqstack handling.

	-hpa



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 22:42 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' H. Peter Anvin
@ 2015-07-16 22:57   ` Andy Lutomirski
  2015-07-18  3:40   ` Ingo Molnar
  1 sibling, 0 replies; 12+ messages in thread
From: Andy Lutomirski @ 2015-07-16 22:57 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Dave Hansen, Thomas Gleixner, Ingo Molnar, X86 ML,
	Peter Zijlstra, Borislav Petkov, Linus Torvalds, linux-kernel

On Thu, Jul 16, 2015 at 3:42 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 07/16/2015 12:14 PM, Dave Hansen wrote:
>> The FPU rewrite removed the dynamic allocations of 'struct fpu'.
>> But, this potentially wastes massive amounts of memory (2k per
>> task on systems that do not have AVX-512 for instance).
>>
>> Instead of having a separate slab, this patch just appends the
>> space that we need to the 'task_struct' which we dynamically
>> allocate already.  This saves from doing an extra slab allocation
>> at fork().  The only real downside here is that we have to stick
>> everything and the end of the task_struct.  But, I think the
>> BUILD_BUG_ON()s I stuck in there should keep that from being too
>> fragile.
>>
>> This survives a quick build and boot in a VM.  Does anyone see any
>> real downsides to this?
>>
>
> No.  I have also long advocated for merging task_struct and thread_info
> into a common structure and get it off the stack; it would improve
> security and avoid weird corner cases in the irqstack handling.
>

In tip/x86/asm, entry_64.S only references thread_info in two places,
both in the syscall code.  I'm hoping that we can move that code into
C soon and that we can do the same thing to the 32-bit code.  If we do
that, then we could just move ti->flags into thread_struct.
Everything else should be easy, and then thread_info would be empty.

--Andy

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 19:14 [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Dave Hansen
                   ` (2 preceding siblings ...)
  2015-07-16 22:42 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' H. Peter Anvin
@ 2015-07-17  8:23 ` Ingo Molnar
  3 siblings, 0 replies; 12+ messages in thread
From: Ingo Molnar @ 2015-07-17  8:23 UTC (permalink / raw)
  To: Dave Hansen
  Cc: tglx, mingo, hpa, x86, peterz, bp, luto, torvalds, linux-kernel


* Dave Hansen <dave@sr71.net> wrote:

> The FPU rewrite removed the dynamic allocations of 'struct fpu'.
> But, this potentially wastes massive amounts of memory (2k per
> task on systems that do not have AVX-512 for instance).
> 
> Instead of having a separate slab, this patch just appends the
> space that we need to the 'task_struct' which we dynamically
> allocate already.  This saves from doing an extra slab allocation
> at fork().  The only real downside here is that we have to stick
> everything and the end of the task_struct.  But, I think the
> BUILD_BUG_ON()s I stuck in there should keep that from being too
> fragile.
> 
> This survives a quick build and boot in a VM.  Does anyone see any
> real downsides to this?

So considering the complexity of the other patch that makes the static allocation, 
I'd massively prefer this patch as it solves the real bug.

It should also work on future hardware a lot better.

This was the dynamic approach I suggested in our discussion of the big FPU code 
rework.

> --- a/arch/x86/kernel/fpu/init.c~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.355571648 -0700
> +++ b/arch/x86/kernel/fpu/init.c	2015-07-16 12:02:15.284280976 -0700
> @@ -136,6 +136,45 @@ static void __init fpu__init_system_gene
>  unsigned int xstate_size;
>  EXPORT_SYMBOL_GPL(xstate_size);
>  
> +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
> +	BUILD_BUG_ON((sizeof(TYPE) -			\
> +			offsetof(TYPE, MEMBER) -	\
> +			sizeof(((TYPE *)0)->MEMBER)) > 	\
> +			0)				\
> +
> +/*
> + * We append the 'struct fpu' to the task_struct.
> + */
> +int __weak arch_task_struct_size(void)

This should not be __weak, otherwise we risk getting the generic version:

> --- a/kernel/fork.c~dynamically-allocate-struct-fpu	2015-07-16 10:50:42.357571739 -0700
> +++ b/kernel/fork.c	2015-07-16 11:25:53.873498188 -0700
> @@ -287,15 +287,21 @@ static void set_max_threads(unsigned int
>  	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
>  }
>  
> +int __weak arch_task_struct_size(void)
> +{
> +	return sizeof(struct task_struct);
> +}
> +

Your system probably worked due to link order preferring the x86 version but I'm 
not sure.

Other than this bug it looks good to me in principle.

Lemme check it on various hardware.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 22:35 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Peter Zijlstra
@ 2015-07-17  8:39   ` Ingo Molnar
  2015-07-17  8:43     ` [PATCH] x86/fpu, bug.h: Move CHECK_MEMBER_AT_END_OF() to a generic header and use it in generic code Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2015-07-17  8:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Dave Hansen, tglx, mingo, hpa, x86, bp, luto, torvalds, linux-kernel


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Thu, Jul 16, 2015 at 12:14:37PM -0700, Dave Hansen wrote:
> > +++ b/arch/x86/kernel/fpu/init.c	2015-07-16 12:02:15.284280976 -0700
> > @@ -136,6 +136,45 @@ static void __init fpu__init_system_gene
> >  unsigned int xstate_size;
> >  EXPORT_SYMBOL_GPL(xstate_size);
> >  
> > +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
> > +	BUILD_BUG_ON((sizeof(TYPE) -			\
> > +			offsetof(TYPE, MEMBER) -	\
> > +			sizeof(((TYPE *)0)->MEMBER)) > 	\
> > +			0)				\
> > +
> > +/*
> > + * We append the 'struct fpu' to the task_struct.
> > + */
> > +int __weak arch_task_struct_size(void)
> > +{
> > +	int task_size = sizeof(struct task_struct);
> > +
> > +	/*
> > +	 * Subtract off the static size of the register state.
> > +	 * It potentially has a bunch of padding.
> > +	 */
> > +	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
> > +
> > +	/*
> > +	 * Add back the dynamically-calculated register state
> > +	 * size.
> > +	 */
> > +	task_size += xstate_size;
> > +
> > +	/*
> > +	 * We dynamically size 'struct fpu', so we require that
> > +	 * it be at the end of 'thread_struct' and that
> > +	 * 'thread_struct' be at the end of 'task_struct'.  If
> > +	 * you hit a compile error here, check the structure to
> > +	 * see if something got added to the end.
> > +	 */
> > +	CHECK_MEMBER_AT_END_OF(struct fpu, state);
> > +	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
> > +	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
> > +
> > +	return task_size;
> > +}
> 
> Since you want these invariants true at all times, maybe put the
> BUILD_BUG_ON() in generic code instead of x86 specific? That way people
> poking at other archs are less likely to accidentally break your stuff.

Yeah.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] x86/fpu, bug.h: Move CHECK_MEMBER_AT_END_OF() to a generic header and use it in generic code
  2015-07-17  8:39   ` Ingo Molnar
@ 2015-07-17  8:43     ` Ingo Molnar
  0 siblings, 0 replies; 12+ messages in thread
From: Ingo Molnar @ 2015-07-17  8:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Dave Hansen, tglx, mingo, hpa, x86, bp, luto, torvalds, linux-kernel


* Ingo Molnar <mingo@kernel.org> wrote:

> 
> * Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Thu, Jul 16, 2015 at 12:14:37PM -0700, Dave Hansen wrote:
> > > +++ b/arch/x86/kernel/fpu/init.c	2015-07-16 12:02:15.284280976 -0700
> > > @@ -136,6 +136,45 @@ static void __init fpu__init_system_gene
> > >  unsigned int xstate_size;
> > >  EXPORT_SYMBOL_GPL(xstate_size);
> > >  
> > > +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
> > > +	BUILD_BUG_ON((sizeof(TYPE) -			\
> > > +			offsetof(TYPE, MEMBER) -	\
> > > +			sizeof(((TYPE *)0)->MEMBER)) > 	\
> > > +			0)				\
> > > +
> > > +/*
> > > + * We append the 'struct fpu' to the task_struct.
> > > + */
> > > +int __weak arch_task_struct_size(void)
> > > +{
> > > +	int task_size = sizeof(struct task_struct);
> > > +
> > > +	/*
> > > +	 * Subtract off the static size of the register state.
> > > +	 * It potentially has a bunch of padding.
> > > +	 */
> > > +	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
> > > +
> > > +	/*
> > > +	 * Add back the dynamically-calculated register state
> > > +	 * size.
> > > +	 */
> > > +	task_size += xstate_size;
> > > +
> > > +	/*
> > > +	 * We dynamically size 'struct fpu', so we require that
> > > +	 * it be at the end of 'thread_struct' and that
> > > +	 * 'thread_struct' be at the end of 'task_struct'.  If
> > > +	 * you hit a compile error here, check the structure to
> > > +	 * see if something got added to the end.
> > > +	 */
> > > +	CHECK_MEMBER_AT_END_OF(struct fpu, state);
> > > +	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
> > > +	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
> > > +
> > > +	return task_size;
> > > +}
> > 
> > Since you want these invariants true at all times, maybe put the
> > BUILD_BUG_ON() in generic code instead of x86 specific? That way people
> > poking at other archs are less likely to accidentally break your stuff.
> 
> Yeah.

The patch below implements this. Only build tested.

Thanks,

	Ingo

===============>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/init.c |   17 ++---------------
 include/linux/bug.h        |    4 ++++
 kernel/fork.c              |   18 ++++++++++++++++++
 3 files changed, 24 insertions(+), 15 deletions(-)

Index: tip/arch/x86/kernel/fpu/init.c
===================================================================
--- tip.orig/arch/x86/kernel/fpu/init.c
+++ tip/arch/x86/kernel/fpu/init.c
@@ -136,16 +136,10 @@ static void __init fpu__init_system_gene
 unsigned int xstate_size;
 EXPORT_SYMBOL_GPL(xstate_size);
 
-#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
-	BUILD_BUG_ON((sizeof(TYPE) -			\
-			offsetof(TYPE, MEMBER) -	\
-			sizeof(((TYPE *)0)->MEMBER)) > 	\
-			0)				\
-
 /*
  * We append the 'struct fpu' to the task_struct.
  */
-int __weak arch_task_struct_size(void)
+int arch_task_struct_size(void)
 {
 	int task_size = sizeof(struct task_struct);
 
@@ -161,16 +155,9 @@ int __weak arch_task_struct_size(void)
 	 */
 	task_size += xstate_size;
 
-	/*
-	 * We dynamically size 'struct fpu', so we require that
-	 * it be at the end of 'thread_struct' and that
-	 * 'thread_struct' be at the end of 'task_struct'.  If
-	 * you hit a compile error here, check the structure to
-	 * see if something got added to the end.
-	 */
+	/* Build time FPU structure layout debug checks: */
 	CHECK_MEMBER_AT_END_OF(struct fpu, state);
 	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
-	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
 
 	return task_size;
 }
Index: tip/include/linux/bug.h
===================================================================
--- tip.orig/include/linux/bug.h
+++ tip/include/linux/bug.h
@@ -85,6 +85,10 @@ struct pt_regs;
 
 #endif	/* __CHECKER__ */
 
+/* Enforce that 'MEMBER' is the last field of 'TYPE': */
+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
+	BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+
 #ifdef CONFIG_GENERIC_BUG
 #include <asm-generic/bug.h>
 
Index: tip/kernel/fork.c
===================================================================
--- tip.orig/kernel/fork.c
+++ tip/kernel/fork.c
@@ -287,8 +287,26 @@ static void set_max_threads(unsigned int
 	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
 }
 
+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
+	BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+
+/*
+ * This function can be overridden by the architecture to support dynamic sizing
+ * of the task_struct:
+ */
 int __weak arch_task_struct_size(void)
 {
+	/*
+	 * Build-time checks for structure layout constraints:
+	 *
+	 * On some architectures we dynamically size 'struct thread_struct',
+	 * so we require that it be at the end of 'task_struct'.
+	 *
+	 * If you hit a compile error here, check the structure to
+	 * see if something got added to the end.
+	 */
+	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
+
 	return sizeof(struct task_struct);
 }
 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 21:29   ` Dave Hansen
@ 2015-07-17  8:45     ` Ingo Molnar
  2015-07-17  9:31       ` [PATCH] x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86 Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2015-07-17  8:45 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andy Lutomirski, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Peter Zijlstra, Borislav Petkov, Linus Torvalds,
	linux-kernel


* Dave Hansen <dave@sr71.net> wrote:

> >>  #endif /* _ASM_X86_FPU_H */
> >> diff -puN arch/x86/kernel/process.c~dynamically-allocate-struct-fpu arch/x86/kernel/process.c
> >> --- a/arch/x86/kernel/process.c~dynamically-allocate-struct-fpu 2015-07-16 10:50:42.360571875 -0700
> >> +++ b/arch/x86/kernel/process.c 2015-07-16 12:00:59.204808551 -0700
> >> @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
> >>   */
> >>  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> >>  {
> >> -       *dst = *src;
> >> +       memcpy(dst, src, arch_task_struct_size());
> > 
> > This is actually vaguely performance-critical, which makes me thing
> > that using some kind of inline or other real way (config macro, ifdef,
> > etc) to detect whether there's an arch override would be better than a
> > weak function.
> 
> Fair enough.  I'll send out another version in a bit if there are no more 
> comments.

Beyond making it a build time switch for other architectures, I'd also suggest 
introducing a __read_mostly task_struct_size variable on x86, so that we can 
write:

	memcpy(dst, src, task_struct_size);

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86
  2015-07-17  8:45     ` Ingo Molnar
@ 2015-07-17  9:31       ` Ingo Molnar
  0 siblings, 0 replies; 12+ messages in thread
From: Ingo Molnar @ 2015-07-17  9:31 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andy Lutomirski, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Peter Zijlstra, Borislav Petkov, Linus Torvalds,
	linux-kernel


* Ingo Molnar <mingo@kernel.org> wrote:

> 
> * Dave Hansen <dave@sr71.net> wrote:
> 
> > >>  #endif /* _ASM_X86_FPU_H */
> > >> diff -puN arch/x86/kernel/process.c~dynamically-allocate-struct-fpu arch/x86/kernel/process.c
> > >> --- a/arch/x86/kernel/process.c~dynamically-allocate-struct-fpu 2015-07-16 10:50:42.360571875 -0700
> > >> +++ b/arch/x86/kernel/process.c 2015-07-16 12:00:59.204808551 -0700
> > >> @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
> > >>   */
> > >>  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> > >>  {
> > >> -       *dst = *src;
> > >> +       memcpy(dst, src, arch_task_struct_size());
> > > 
> > > This is actually vaguely performance-critical, which makes me thing
> > > that using some kind of inline or other real way (config macro, ifdef,
> > > etc) to detect whether there's an arch override would be better than a
> > > weak function.
> > 
> > Fair enough.  I'll send out another version in a bit if there are no more 
> > comments.
> 
> Beyond making it a build time switch for other architectures, I'd also suggest 
> introducing a __read_mostly task_struct_size variable on x86, so that we can 
> write:
> 
> 	memcpy(dst, src, task_struct_size);

I.e. like the patch below, on top of your patch plus the bug.h patch I sent.

Only build tested.

Thanks,

	Ingo

======================>
>From ab5da866792e4297de2308b42121633419f6d06d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 17 Jul 2015 11:27:36 +0200
Subject: [PATCH] x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86

Don't burden architectures without dynamic task_struct sizing with the overhead
of dynamic sizing.

Also optimize the x86 code a bit by caching task_struct_size.

Cc: Dave Hansen <dave@sr71.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig               |  4 ++++
 arch/x86/Kconfig           |  1 +
 arch/x86/kernel/fpu/init.c |  9 ++++++---
 arch/x86/kernel/process.c  |  2 +-
 fs/proc/kcore.c            |  4 ++--
 include/linux/sched.h      |  4 +++-
 kernel/fork.c              | 21 ++++++++-------------
 7 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index bec6666a3cc4..dd6867885c73 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -221,6 +221,10 @@ config ARCH_TASK_STRUCT_ALLOCATOR
 config ARCH_THREAD_INFO_ALLOCATOR
 	bool
 
+# Select if arch wants to size task_struct dynamically
+config ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	bool
+
 config HAVE_REGS_AND_STACK_ACCESS_API
 	bool
 	help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b6b03b716302..1e1d0e8ac9b6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -41,6 +41,7 @@ config X86
 	select ARCH_USE_CMPXCHG_LOCKREF		if X86_64
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 36de0ebdb957..5401243430ee 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -4,6 +4,8 @@
 #include <asm/fpu/internal.h>
 #include <asm/tlbflush.h>
 
+#include <linux/sched.h>
+
 /*
  * Initialize the TS bit in CR0 according to the style of context-switches
  * we are using:
@@ -137,9 +139,9 @@ unsigned int xstate_size;
 EXPORT_SYMBOL_GPL(xstate_size);
 
 /*
- * We append the 'struct fpu' to the task_struct.
+ * We append the 'struct fpu' to the task_struct:
  */
-int arch_task_struct_size(void)
+static void __init fpu__init_task_struct_size(void)
 {
 	int task_size = sizeof(struct task_struct);
 
@@ -159,7 +161,7 @@ int arch_task_struct_size(void)
 	CHECK_MEMBER_AT_END_OF(struct fpu, state);
 	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
 
-	return task_size;
+	task_struct_size = task_size;
 }
 
 /*
@@ -313,6 +315,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
 	fpu__init_system_generic();
 	fpu__init_system_xstate_size_legacy();
 	fpu__init_system_xstate();
+	fpu__init_task_struct_size();
 
 	fpu__init_system_ctx_switch();
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 975420eac105..ae6a315cfa73 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	memcpy(dst, src, arch_task_struct_size());
+	memcpy(dst, src, task_struct_size);
 
 	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a0fe99485687..1cfa1127c9dd 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 			     roundup(sizeof(CORE_STR), 4)) +
 			roundup(sizeof(struct elf_prstatus), 4) +
 			roundup(sizeof(struct elf_prpsinfo), 4) +
-			roundup(arch_task_struct_size(), 4);
+			roundup(task_struct_size, 4);
 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
 	return size + *elf_buflen;
 }
@@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	/* set up the task structure */
 	notes[2].name	= CORE_STR;
 	notes[2].type	= NT_TASKSTRUCT;
-	notes[2].datasz	= arch_task_struct_size();
+	notes[2].datasz	= task_struct_size;
 	notes[2].data	= current;
 
 	nhdr->p_filesz	+= notesize(&notes[2]);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e43a41d892b6..d0504bb9f12a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1786,7 +1786,9 @@ struct task_struct {
  */
 };
 
-extern int arch_task_struct_size(void);
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+extern int task_struct_size __read_mostly;
+#endif
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
diff --git a/kernel/fork.c b/kernel/fork.c
index 9f84b1aa3dbf..cfbe24ddf3eb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -287,14 +287,15 @@ static void set_max_threads(unsigned int max_threads_suggested)
 	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
 }
 
-#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
-	BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+/* Initialized by the architecture: */
+int task_struct_size __read_mostly;
+#else
+/* Static build time size: */
+static const int task_struct_size = sizeof(struct task_struct);
+#endif
 
-/*
- * This function can be overridden by the architecture to support dynamic sizing
- * of the task_struct:
- */
-int __weak arch_task_struct_size(void)
+void __init fork_init(void)
 {
 	/*
 	 * Build-time checks for structure layout constraints:
@@ -307,12 +308,6 @@ int __weak arch_task_struct_size(void)
 	 */
 	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
 
-	return sizeof(struct task_struct);
-}
-
-void __init fork_init(void)
-{
-	int task_struct_size = arch_task_struct_size();
 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu'
  2015-07-16 22:42 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' H. Peter Anvin
  2015-07-16 22:57   ` Andy Lutomirski
@ 2015-07-18  3:40   ` Ingo Molnar
  1 sibling, 0 replies; 12+ messages in thread
From: Ingo Molnar @ 2015-07-18  3:40 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Dave Hansen, tglx, mingo, x86, peterz, bp, luto, torvalds, linux-kernel


* H. Peter Anvin <hpa@zytor.com> wrote:

> On 07/16/2015 12:14 PM, Dave Hansen wrote:
> > The FPU rewrite removed the dynamic allocations of 'struct fpu'.
> > But, this potentially wastes massive amounts of memory (2k per
> > task on systems that do not have AVX-512 for instance).
> > 
> > Instead of having a separate slab, this patch just appends the
> > space that we need to the 'task_struct' which we dynamically
> > allocate already.  This saves from doing an extra slab allocation
> > at fork().  The only real downside here is that we have to stick
> > everything and the end of the task_struct.  But, I think the
> > BUILD_BUG_ON()s I stuck in there should keep that from being too
> > fragile.
> > 
> > This survives a quick build and boot in a VM.  Does anyone see any
> > real downsides to this?
> 
> No.  I have also long advocated for merging task_struct and thread_info into a 
> common structure and get it off the stack; it would improve security and avoid 
> weird corner cases in the irqstack handling.

Note that we have 3 related 'task state' data structures with overlapping purpose:

  task_struct
   thread_struct
  thread_info

where thread_struct is embedded in task_struct currently.

So to turn it all into a single structure we'd have to merge thread_info into 
thread_struct. thread_info was put on the kernel stack due to the ESP trick we 
played long ago - but that is moot these days.

So I think what we want is not some common structure, but to actually merge all of 
thread_info into thread_struct for arch details and into task_struct for generic 
fields, and only have:

  task_struct                /* generic fields */
   thread_struct             /* arch details */

this can be done gradually, field by field, and in the end thread_info can be 
eliminated altogether.

The only real complication is that it affects every architecture. The good news is 
that most of the thread_info layout details are wrapped in various constructs like 
test_ti_thread_flag() and task_thread_info().

While at it we might as well rename 'thread_struct' to 'arch_task_struct':

  task_struct                  /* generic fields */
  arch_task_struct             /* arch details */

to make it really clear and easy to understand at a glance - as the current naming 
is has become ambiguous and slightly confusing the moment we introduced threading.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-07-18  3:40 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-16 19:14 [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Dave Hansen
2015-07-16 19:25 ` Andy Lutomirski
2015-07-16 21:29   ` Dave Hansen
2015-07-17  8:45     ` Ingo Molnar
2015-07-17  9:31       ` [PATCH] x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86 Ingo Molnar
2015-07-16 22:35 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' Peter Zijlstra
2015-07-17  8:39   ` Ingo Molnar
2015-07-17  8:43     ` [PATCH] x86/fpu, bug.h: Move CHECK_MEMBER_AT_END_OF() to a generic header and use it in generic code Ingo Molnar
2015-07-16 22:42 ` [RFC][PATCH] x86, fpu: dynamically allocate 'struct fpu' H. Peter Anvin
2015-07-16 22:57   ` Andy Lutomirski
2015-07-18  3:40   ` Ingo Molnar
2015-07-17  8:23 ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.