* [PATCH v2 1/8] kernel/fork: Redo ifdefs around task's handling.
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
@ 2022-02-17 10:23 ` Sebastian Andrzej Siewior
2022-02-22 21:27 ` [tip: core/core] fork: Redo ifdefs around task stack handling tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 2/8] kernel/fork: Duplicate task_struct before stack allocation Sebastian Andrzej Siewior
` (8 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:23 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
The use of ifdef CONFIG_VMAP_STACK is confusing in terms what is
actually happenning and what can happen.
For instance from reading free_thread_stack() it appears that in the
CONFIG_VMAP_STACK case we may receive a non-NULL vm pointer but it may
also be NULL in which case __free_pages() is used to free the stack.
This is however not the case because in the VMAP case a non-NULL pointer
is always returned here.
Since it looks like this might happen, the compiler creates the correct
dead code with the invocation to __free_pages() and everything around
it. Twice.
Add spaces between the ifdef and the identifer to recognize the ifdef
level that we are currently in.
Add the current identifer as a comment behind #else and #endif.
Move the code within free_thread_stack() and alloc_thread_stack_node()
into the relavant ifdef block.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/fork.c | 74 +++++++++++++++++++++++++++------------------------
1 file changed, 39 insertions(+), 35 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index d75a528f7b219..f63c0af6002da 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -185,7 +185,7 @@ static inline void free_task_struct(struct task_struct *tsk)
*/
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
-#ifdef CONFIG_VMAP_STACK
+# ifdef CONFIG_VMAP_STACK
/*
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
* flush. Try to minimize the number of calls by caching stacks.
@@ -210,11 +210,9 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-#endif
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
-#ifdef CONFIG_VMAP_STACK
void *stack;
int i;
@@ -258,7 +256,34 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
tsk->stack = stack;
}
return stack;
-#else
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+ struct vm_struct *vm = task_stack_vm_area(tsk);
+ int i;
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL,
+ tsk->stack_vm_area) != NULL)
+ continue;
+
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+ return;
+ }
+ vfree_atomic(tsk->stack);
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+}
+
+# else /* !CONFIG_VMAP_STACK */
+
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
@@ -267,36 +292,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
return tsk->stack;
}
return NULL;
-#endif
}
-static inline void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk)
{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
- int i;
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
-
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i],
- NULL, tsk->stack_vm_area) != NULL)
- continue;
-
- return;
- }
-
- vfree_atomic(tsk->stack);
- return;
- }
-#endif
-
__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ tsk->stack = NULL;
}
-# else
+
+# endif /* CONFIG_VMAP_STACK */
+# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
+
static struct kmem_cache *thread_stack_cache;
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
@@ -312,6 +318,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
static void free_thread_stack(struct task_struct *tsk)
{
kmem_cache_free(thread_stack_cache, tsk->stack);
+ tsk->stack = NULL;
}
void thread_stack_cache_init(void)
@@ -321,8 +328,9 @@ void thread_stack_cache_init(void)
THREAD_SIZE, NULL);
BUG_ON(thread_stack_cache == NULL);
}
-# endif
-#endif
+
+# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;
@@ -432,10 +440,6 @@ static void release_task_stack(struct task_struct *tsk)
account_kernel_stack(tsk, -1);
free_thread_stack(tsk);
- tsk->stack = NULL;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = NULL;
-#endif
}
#ifdef CONFIG_THREAD_INFO_IN_TASK
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Redo ifdefs around task stack handling
2022-02-17 10:23 ` [PATCH v2 1/8] kernel/fork: Redo ifdefs around task's handling Sebastian Andrzej Siewior
@ 2022-02-22 21:27 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:27 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: be9a2277cafd318976d59c41a7f45a934ec43b26
Gitweb: https://git.kernel.org/tip/be9a2277cafd318976d59c41a7f45a934ec43b26
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:23:59 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:01 +01:00
fork: Redo ifdefs around task stack handling
The use of ifdef CONFIG_VMAP_STACK is confusing in terms what is
actually happenning and what can happen.
For instance from reading free_thread_stack() it appears that in the
CONFIG_VMAP_STACK case it may receive a non-NULL vm pointer but it may also
be NULL in which case __free_pages() is used to free the stack. This is
however not the case because in the VMAP case a non-NULL pointer is always
returned here. Since it looks like this might happen, the compiler creates
the correct dead code with the invocation to __free_pages() and everything
around it. Twice.
Add spaces between the ifdef and the identifer to recognize the ifdef
level which is currently in scope.
Add the current identifer as a comment behind #else and #endif.
Move the code within free_thread_stack() and alloc_thread_stack_node()
into the relevant ifdef blocks.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-2-bigeasy@linutronix.de
---
kernel/fork.c | 74 ++++++++++++++++++++++++++------------------------
1 file changed, 39 insertions(+), 35 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index a024bf6..f5cc101 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -185,7 +185,7 @@ static inline void free_task_struct(struct task_struct *tsk)
*/
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
-#ifdef CONFIG_VMAP_STACK
+# ifdef CONFIG_VMAP_STACK
/*
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
* flush. Try to minimize the number of calls by caching stacks.
@@ -210,11 +210,9 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-#endif
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
-#ifdef CONFIG_VMAP_STACK
void *stack;
int i;
@@ -258,45 +256,53 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
tsk->stack = stack;
}
return stack;
-#else
- struct page *page = alloc_pages_node(node, THREADINFO_GFP,
- THREAD_SIZE_ORDER);
-
- if (likely(page)) {
- tsk->stack = kasan_reset_tag(page_address(page));
- return tsk->stack;
- }
- return NULL;
-#endif
}
-static inline void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk)
{
-#ifdef CONFIG_VMAP_STACK
struct vm_struct *vm = task_stack_vm_area(tsk);
+ int i;
- if (vm) {
- int i;
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL,
+ tsk->stack_vm_area) != NULL)
+ continue;
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i],
- NULL, tsk->stack_vm_area) != NULL)
- continue;
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+ return;
+ }
+ vfree_atomic(tsk->stack);
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+}
- return;
- }
+# else /* !CONFIG_VMAP_STACK */
- vfree_atomic(tsk->stack);
- return;
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
+
+ if (likely(page)) {
+ tsk->stack = kasan_reset_tag(page_address(page));
+ return tsk->stack;
}
-#endif
+ return NULL;
+}
+static void free_thread_stack(struct task_struct *tsk)
+{
__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ tsk->stack = NULL;
}
-# else
+
+# endif /* CONFIG_VMAP_STACK */
+# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
+
static struct kmem_cache *thread_stack_cache;
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
@@ -312,6 +318,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
static void free_thread_stack(struct task_struct *tsk)
{
kmem_cache_free(thread_stack_cache, tsk->stack);
+ tsk->stack = NULL;
}
void thread_stack_cache_init(void)
@@ -321,8 +328,9 @@ void thread_stack_cache_init(void)
THREAD_SIZE, NULL);
BUG_ON(thread_stack_cache == NULL);
}
-# endif
-#endif
+
+# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;
@@ -432,10 +440,6 @@ static void release_task_stack(struct task_struct *tsk)
account_kernel_stack(tsk, -1);
free_thread_stack(tsk);
- tsk->stack = NULL;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = NULL;
-#endif
}
#ifdef CONFIG_THREAD_INFO_IN_TASK
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 2/8] kernel/fork: Duplicate task_struct before stack allocation.
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
2022-02-17 10:23 ` [PATCH v2 1/8] kernel/fork: Redo ifdefs around task's handling Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:27 ` [tip: core/core] fork: " tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 3/8] kernel/fork, IA64: Provide a alloc_thread_stack_node() for IA64 Sebastian Andrzej Siewior
` (7 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
alloc_thread_stack_node() already populates the task_struct::stack
member except on IA64. The stack pointer is saved and populated again
because IA64 needs it and arch_dup_task_struct() overwrites it.
Allocate thread's stack after task_struct has been duplicated as a
preparation.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/fork.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index f63c0af6002da..c47dcba5d66d2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -888,6 +888,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (!tsk)
return NULL;
+ err = arch_dup_task_struct(tsk, orig);
+ if (err)
+ goto free_tsk;
+
stack = alloc_thread_stack_node(tsk, node);
if (!stack)
goto free_tsk;
@@ -897,8 +901,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
stack_vm_area = task_stack_vm_area(tsk);
- err = arch_dup_task_struct(tsk, orig);
-
/*
* arch_dup_task_struct() clobbers the stack-related fields. Make
* sure they're properly initialized before using any stack-related
@@ -912,9 +914,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
refcount_set(&tsk->stack_refcount, 1);
#endif
- if (err)
- goto free_stack;
-
err = scs_prepare(tsk, node);
if (err)
goto free_stack;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Duplicate task_struct before stack allocation
2022-02-17 10:24 ` [PATCH v2 2/8] kernel/fork: Duplicate task_struct before stack allocation Sebastian Andrzej Siewior
@ 2022-02-22 21:27 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:27 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: 546c42b2c5c161619736dd730d3df709181999d0
Gitweb: https://git.kernel.org/tip/546c42b2c5c161619736dd730d3df709181999d0
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:00 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:01 +01:00
fork: Duplicate task_struct before stack allocation
alloc_thread_stack_node() already populates the task_struct::stack
member except on IA64. The stack pointer is saved and populated again
because IA64 needs it and arch_dup_task_struct() overwrites it.
Allocate thread's stack after task_struct has been duplicated as a
preparation for further changes.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-3-bigeasy@linutronix.de
---
kernel/fork.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index f5cc101..30c01ce 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -888,6 +888,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (!tsk)
return NULL;
+ err = arch_dup_task_struct(tsk, orig);
+ if (err)
+ goto free_tsk;
+
stack = alloc_thread_stack_node(tsk, node);
if (!stack)
goto free_tsk;
@@ -897,8 +901,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
stack_vm_area = task_stack_vm_area(tsk);
- err = arch_dup_task_struct(tsk, orig);
-
/*
* arch_dup_task_struct() clobbers the stack-related fields. Make
* sure they're properly initialized before using any stack-related
@@ -912,9 +914,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
refcount_set(&tsk->stack_refcount, 1);
#endif
- if (err)
- goto free_stack;
-
err = scs_prepare(tsk, node);
if (err)
goto free_stack;
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 3/8] kernel/fork, IA64: Provide a alloc_thread_stack_node() for IA64.
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
2022-02-17 10:23 ` [PATCH v2 1/8] kernel/fork: Redo ifdefs around task's handling Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 2/8] kernel/fork: Duplicate task_struct before stack allocation Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:27 ` [tip: core/core] fork, IA64: Provide " tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 4/8] kernel/fork: Don't assign the stack pointer in dup_task_struct() Sebastian Andrzej Siewior
` (6 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
Provide a generic alloc_thread_stack_node() for IA64/
CONFIG_ARCH_THREAD_STACK_ALLOCATOR which returns stack pointer and sets
task_struct::stack so it behaves exactly like the other implementations.
Rename IA64's alloc_thread_stack_node() and add the generic version to
the fork code so it is in one place _and_ to drastically lower chances
of fat fingering the IA64 code.
Do the same for free_thread_stack().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/ia64/include/asm/thread_info.h | 6 +++---
kernel/fork.c | 17 +++++++++++++++++
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 51d20cb377062..1684716f08201 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -55,15 +55,15 @@ struct thread_info {
#ifndef ASM_OFFSETS_C
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_stack_node(tsk, node) \
+#define arch_alloc_thread_stack_node(tsk, node) \
((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
#else
#define current_thread_info() ((struct thread_info *) 0)
-#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
+#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
#define task_thread_info(tsk) ((struct thread_info *) 0)
#endif
-#define free_thread_stack(tsk) /* nothing */
+#define arch_free_thread_stack(tsk) /* nothing */
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
diff --git a/kernel/fork.c b/kernel/fork.c
index c47dcba5d66d2..a6697215fe663 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -330,6 +330,23 @@ void thread_stack_cache_init(void)
}
# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
+
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ unsigned long *stack;
+
+ stack = arch_alloc_thread_stack_node(tsk, node);
+ tsk->stack = stack;
+ return stack;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+ arch_free_thread_stack(tsk);
+ tsk->stack = NULL;
+}
+
#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
/* SLAB cache for signal_struct structures (tsk->signal) */
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork, IA64: Provide alloc_thread_stack_node() for IA64
2022-02-17 10:24 ` [PATCH v2 3/8] kernel/fork, IA64: Provide a alloc_thread_stack_node() for IA64 Sebastian Andrzej Siewior
@ 2022-02-22 21:27 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:27 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: 2bb0529c0bc0698f3baf3e88ffd61a18eef252a7
Gitweb: https://git.kernel.org/tip/2bb0529c0bc0698f3baf3e88ffd61a18eef252a7
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:01 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:01 +01:00
fork, IA64: Provide alloc_thread_stack_node() for IA64
Provide a generic alloc_thread_stack_node() for IA64 and
CONFIG_ARCH_THREAD_STACK_ALLOCATOR which returns stack pointer and sets
task_struct::stack so it behaves exactly like the other implementations.
Rename IA64's alloc_thread_stack_node() and add the generic version to the
fork code so it is in one place _and_ to drastically lower the chances of
fat fingering the IA64 code. Do the same for free_thread_stack().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-4-bigeasy@linutronix.de
---
arch/ia64/include/asm/thread_info.h | 6 +++---
kernel/fork.c | 17 +++++++++++++++++
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 51d20cb..1684716 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -55,15 +55,15 @@ struct thread_info {
#ifndef ASM_OFFSETS_C
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_stack_node(tsk, node) \
+#define arch_alloc_thread_stack_node(tsk, node) \
((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
#else
#define current_thread_info() ((struct thread_info *) 0)
-#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
+#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
#define task_thread_info(tsk) ((struct thread_info *) 0)
#endif
-#define free_thread_stack(tsk) /* nothing */
+#define arch_free_thread_stack(tsk) /* nothing */
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
diff --git a/kernel/fork.c b/kernel/fork.c
index 30c01ce..7b70c47 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -330,6 +330,23 @@ void thread_stack_cache_init(void)
}
# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
+
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ unsigned long *stack;
+
+ stack = arch_alloc_thread_stack_node(tsk, node);
+ tsk->stack = stack;
+ return stack;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+ arch_free_thread_stack(tsk);
+ tsk->stack = NULL;
+}
+
#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
/* SLAB cache for signal_struct structures (tsk->signal) */
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 4/8] kernel/fork: Don't assign the stack pointer in dup_task_struct().
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (2 preceding siblings ...)
2022-02-17 10:24 ` [PATCH v2 3/8] kernel/fork, IA64: Provide a alloc_thread_stack_node() for IA64 Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:27 ` [tip: core/core] fork: " tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 5/8] kernel/fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK Sebastian Andrzej Siewior
` (5 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
All four versions of alloc_thread_stack_node() assign now
task_struct::stack in case the allocation was successful.
Let alloc_thread_stack_node() return an error code instead of the stack
pointer and remove the stack assignment in dup_task_struct().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/fork.c | 47 ++++++++++++++++-------------------------------
1 file changed, 16 insertions(+), 31 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index a6697215fe663..546bea2e3b28a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,7 +211,7 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
void *stack;
int i;
@@ -232,7 +232,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
tsk->stack_vm_area = s;
tsk->stack = s->addr;
- return s->addr;
+ return 0;
}
/*
@@ -245,17 +245,16 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
-
+ if (!stack)
+ return -ENOMEM;
/*
* We can't call find_vm_area() in interrupt context, and
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- if (stack) {
- tsk->stack_vm_area = find_vm_area(stack);
- tsk->stack = stack;
- }
- return stack;
+ tsk->stack_vm_area = find_vm_area(stack);
+ tsk->stack = stack;
+ return 0;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -282,16 +281,16 @@ static void free_thread_stack(struct task_struct *tsk)
# else /* !CONFIG_VMAP_STACK */
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
if (likely(page)) {
tsk->stack = kasan_reset_tag(page_address(page));
- return tsk->stack;
+ return 0;
}
- return NULL;
+ return -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -305,14 +304,13 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
- int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
stack = kasan_reset_tag(stack);
tsk->stack = stack;
- return stack;
+ return stack ? 0 : -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -332,13 +330,13 @@ void thread_stack_cache_init(void)
# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
stack = arch_alloc_thread_stack_node(tsk, node);
tsk->stack = stack;
- return stack;
+ return stack ? 0 : -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -895,8 +893,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
- unsigned long *stack;
- struct vm_struct *stack_vm_area __maybe_unused;
int err;
if (node == NUMA_NO_NODE)
@@ -909,24 +905,13 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_tsk;
- stack = alloc_thread_stack_node(tsk, node);
- if (!stack)
+ err = alloc_thread_stack_node(tsk, node);
+ if (err)
goto free_tsk;
if (memcg_charge_kernel_stack(tsk))
goto free_stack;
- stack_vm_area = task_stack_vm_area(tsk);
-
- /*
- * arch_dup_task_struct() clobbers the stack-related fields. Make
- * sure they're properly initialized before using any stack-related
- * functions again.
- */
- tsk->stack = stack;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = stack_vm_area;
-#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Don't assign the stack pointer in dup_task_struct()
2022-02-17 10:24 ` [PATCH v2 4/8] kernel/fork: Don't assign the stack pointer in dup_task_struct() Sebastian Andrzej Siewior
@ 2022-02-22 21:27 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:27 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: 7865aba3ade4cf30f0ac08e015550084a50d9afb
Gitweb: https://git.kernel.org/tip/7865aba3ade4cf30f0ac08e015550084a50d9afb
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:02 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:01 +01:00
fork: Don't assign the stack pointer in dup_task_struct()
All four versions of alloc_thread_stack_node() assign now
task_struct::stack in case the allocation was successful.
Let alloc_thread_stack_node() return an error code instead of the stack
pointer and remove the stack assignment in dup_task_struct().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-5-bigeasy@linutronix.de
---
kernel/fork.c | 47 ++++++++++++++++-------------------------------
1 file changed, 16 insertions(+), 31 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 7b70c47..875bd43 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,7 +211,7 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
void *stack;
int i;
@@ -232,7 +232,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
tsk->stack_vm_area = s;
tsk->stack = s->addr;
- return s->addr;
+ return 0;
}
/*
@@ -245,17 +245,16 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
-
+ if (!stack)
+ return -ENOMEM;
/*
* We can't call find_vm_area() in interrupt context, and
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- if (stack) {
- tsk->stack_vm_area = find_vm_area(stack);
- tsk->stack = stack;
- }
- return stack;
+ tsk->stack_vm_area = find_vm_area(stack);
+ tsk->stack = stack;
+ return 0;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -282,16 +281,16 @@ static void free_thread_stack(struct task_struct *tsk)
# else /* !CONFIG_VMAP_STACK */
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
if (likely(page)) {
tsk->stack = kasan_reset_tag(page_address(page));
- return tsk->stack;
+ return 0;
}
- return NULL;
+ return -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -305,14 +304,13 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
- int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
stack = kasan_reset_tag(stack);
tsk->stack = stack;
- return stack;
+ return stack ? 0 : -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -332,13 +330,13 @@ void thread_stack_cache_init(void)
# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
stack = arch_alloc_thread_stack_node(tsk, node);
tsk->stack = stack;
- return stack;
+ return stack ? 0 : -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
@@ -895,8 +893,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
- unsigned long *stack;
- struct vm_struct *stack_vm_area __maybe_unused;
int err;
if (node == NUMA_NO_NODE)
@@ -909,24 +905,13 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_tsk;
- stack = alloc_thread_stack_node(tsk, node);
- if (!stack)
+ err = alloc_thread_stack_node(tsk, node);
+ if (err)
goto free_tsk;
if (memcg_charge_kernel_stack(tsk))
goto free_stack;
- stack_vm_area = task_stack_vm_area(tsk);
-
- /*
- * arch_dup_task_struct() clobbers the stack-related fields. Make
- * sure they're properly initialized before using any stack-related
- * functions again.
- */
- tsk->stack = stack;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = stack_vm_area;
-#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 5/8] kernel/fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK.
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (3 preceding siblings ...)
2022-02-17 10:24 ` [PATCH v2 4/8] kernel/fork: Don't assign the stack pointer in dup_task_struct() Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:26 ` [tip: core/core] fork: " tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 6/8] kernel/fork: Move task stack account to do_exit() Sebastian Andrzej Siewior
` (4 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
memcg_charge_kernel_stack() is only used in the CONFIG_VMAP_STACK case.
Move memcg_charge_kernel_stack() into the CONFIG_VMAP_STACK block and
invoke it from within alloc_thread_stack_node().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/fork.c | 69 +++++++++++++++++++++++++++------------------------
1 file changed, 36 insertions(+), 33 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 546bea2e3b28a..919bdcf21b8e5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,6 +211,32 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
+static int memcg_charge_kernel_stack(struct task_struct *tsk)
+{
+ struct vm_struct *vm = task_stack_vm_area(tsk);
+ int i;
+ int ret;
+
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
+ if (ret)
+ goto err;
+ }
+ return 0;
+err:
+ /*
+ * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
+ * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
+ * ignore this page.
+ */
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ return ret;
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
void *stack;
@@ -230,6 +256,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
+ if (memcg_charge_kernel_stack(tsk)) {
+ vfree(s->addr);
+ return -ENOMEM;
+ }
+
tsk->stack_vm_area = s;
tsk->stack = s->addr;
return 0;
@@ -247,6 +278,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
0, node, __builtin_return_address(0));
if (!stack)
return -ENOMEM;
+
+ if (memcg_charge_kernel_stack(tsk)) {
+ vfree(stack);
+ return -ENOMEM;
+ }
/*
* We can't call find_vm_area() in interrupt context, and
* free_thread_stack() can be called in interrupt context,
@@ -418,36 +454,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
-{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
- int ret;
-
- BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
-
- if (vm) {
- int i;
-
- BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- /*
- * If memcg_kmem_charge_page() fails, page's
- * memory cgroup pointer is NULL, and
- * memcg_kmem_uncharge_page() in free_thread_stack()
- * will ignore this page.
- */
- ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
- 0);
- if (ret)
- return ret;
- }
- }
-#endif
- return 0;
-}
-
static void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
@@ -909,9 +915,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_tsk;
- if (memcg_charge_kernel_stack(tsk))
- goto free_stack;
-
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK
2022-02-17 10:24 ` [PATCH v2 5/8] kernel/fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK Sebastian Andrzej Siewior
@ 2022-02-22 21:26 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:26 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: f1c1a9ee00e4c53c9ccc03ec1aff4792948a25eb
Gitweb: https://git.kernel.org/tip/f1c1a9ee00e4c53c9ccc03ec1aff4792948a25eb
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:03 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:01 +01:00
fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK
memcg_charge_kernel_stack() is only used in the CONFIG_VMAP_STACK case.
Move memcg_charge_kernel_stack() into the CONFIG_VMAP_STACK block and
invoke it from within alloc_thread_stack_node().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-6-bigeasy@linutronix.de
---
kernel/fork.c | 69 ++++++++++++++++++++++++++------------------------
1 file changed, 36 insertions(+), 33 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 875bd43..ac63e7f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,6 +211,32 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
+static int memcg_charge_kernel_stack(struct task_struct *tsk)
+{
+ struct vm_struct *vm = task_stack_vm_area(tsk);
+ int i;
+ int ret;
+
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
+ if (ret)
+ goto err;
+ }
+ return 0;
+err:
+ /*
+ * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
+ * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
+ * ignore this page.
+ */
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ return ret;
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
void *stack;
@@ -230,6 +256,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
+ if (memcg_charge_kernel_stack(tsk)) {
+ vfree(s->addr);
+ return -ENOMEM;
+ }
+
tsk->stack_vm_area = s;
tsk->stack = s->addr;
return 0;
@@ -247,6 +278,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
0, node, __builtin_return_address(0));
if (!stack)
return -ENOMEM;
+
+ if (memcg_charge_kernel_stack(tsk)) {
+ vfree(stack);
+ return -ENOMEM;
+ }
/*
* We can't call find_vm_area() in interrupt context, and
* free_thread_stack() can be called in interrupt context,
@@ -418,36 +454,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
-{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
- int ret;
-
- BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
-
- if (vm) {
- int i;
-
- BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- /*
- * If memcg_kmem_charge_page() fails, page's
- * memory cgroup pointer is NULL, and
- * memcg_kmem_uncharge_page() in free_thread_stack()
- * will ignore this page.
- */
- ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
- 0);
- if (ret)
- return ret;
- }
- }
-#endif
- return 0;
-}
-
static void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
@@ -909,9 +915,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_tsk;
- if (memcg_charge_kernel_stack(tsk))
- goto free_stack;
-
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 6/8] kernel/fork: Move task stack account to do_exit().
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (4 preceding siblings ...)
2022-02-17 10:24 ` [PATCH v2 5/8] kernel/fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:26 ` [tip: core/core] fork: Move task stack accounting " tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
` (3 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
There is no need to perform the stack accounting of the outgoing task in
its final schedule() invocation which happens with disabled preemption.
The task is leaving, the resources will be freed and the accounting can
happen in do_exit() before the actual schedule invocation which
frees the stack memory.
Move the accounting of the stack memory from release_task_stack() to
exit_task_stack_account() which then can be invoked from do_exit().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/sched/task_stack.h | 2 ++
kernel/exit.c | 1 +
kernel/fork.c | 35 +++++++++++++++++++++-----------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d10150587d819..892562ebbd3aa 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
static inline void put_task_stack(struct task_struct *tsk) {}
#endif
+void exit_task_stack_account(struct task_struct *tsk);
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab93..c303cffe7fdb4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,6 +845,7 @@ void __noreturn do_exit(long code)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ exit_task_stack_account(tsk);
check_stack_usage();
preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index 919bdcf21b8e5..984f69d6f211f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,9 +211,8 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
{
- struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
int ret;
@@ -239,6 +238,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
+ struct vm_struct *vm;
void *stack;
int i;
@@ -256,7 +256,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
- if (memcg_charge_kernel_stack(tsk)) {
+ if (memcg_charge_kernel_stack(s)) {
vfree(s->addr);
return -ENOMEM;
}
@@ -279,7 +279,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!stack)
return -ENOMEM;
- if (memcg_charge_kernel_stack(tsk)) {
+ vm = find_vm_area(stack);
+ if (memcg_charge_kernel_stack(vm)) {
vfree(stack);
return -ENOMEM;
}
@@ -288,19 +289,15 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- tsk->stack_vm_area = find_vm_area(stack);
+ tsk->stack_vm_area = vm;
tsk->stack = stack;
return 0;
}
static void free_thread_stack(struct task_struct *tsk)
{
- struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
-
for (i = 0; i < NR_CACHED_STACKS; i++) {
if (this_cpu_cmpxchg(cached_stacks[i], NULL,
tsk->stack_vm_area) != NULL)
@@ -454,12 +451,25 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
+void exit_task_stack_account(struct task_struct *tsk)
+{
+ account_kernel_stack(tsk, -1);
+
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm;
+ int i;
+
+ vm = task_stack_vm_area(tsk);
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ }
+}
+
static void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
- account_kernel_stack(tsk, -1);
free_thread_stack(tsk);
}
@@ -918,6 +928,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
+ account_kernel_stack(tsk, 1);
err = scs_prepare(tsk, node);
if (err)
@@ -961,8 +972,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->wake_q.next = NULL;
tsk->worker_private = NULL;
- account_kernel_stack(tsk, 1);
-
kcov_task_init(tsk);
kmap_local_fork(tsk);
@@ -981,6 +990,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
return tsk;
free_stack:
+ exit_task_stack_account(tsk);
free_thread_stack(tsk);
free_tsk:
free_task_struct(tsk);
@@ -2449,6 +2459,7 @@ static __latent_entropy struct task_struct *copy_process(
exit_creds(p);
bad_fork_free:
WRITE_ONCE(p->__state, TASK_DEAD);
+ exit_task_stack_account(p);
put_task_stack(p);
delayed_free_task(p);
fork_out:
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Move task stack accounting to do_exit()
2022-02-17 10:24 ` [PATCH v2 6/8] kernel/fork: Move task stack account to do_exit() Sebastian Andrzej Siewior
@ 2022-02-22 21:26 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:26 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: 1a03d3f13ffe5dd24142d6db629e72c11b704d99
Gitweb: https://git.kernel.org/tip/1a03d3f13ffe5dd24142d6db629e72c11b704d99
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:04 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:02 +01:00
fork: Move task stack accounting to do_exit()
There is no need to perform the stack accounting of the outgoing task in
its final schedule() invocation which happens with preemption disabled.
The task is leaving, the resources will be freed and the accounting can
happen in do_exit() before the actual schedule invocation which
frees the stack memory.
Move the accounting of the stack memory from release_task_stack() to
exit_task_stack_account() which then can be invoked from do_exit().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-7-bigeasy@linutronix.de
---
include/linux/sched/task_stack.h | 2 ++-
kernel/exit.c | 1 +-
kernel/fork.c | 35 ++++++++++++++++++++-----------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d101505..892562e 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
static inline void put_task_stack(struct task_struct *tsk) {}
#endif
+void exit_task_stack_account(struct task_struct *tsk);
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25b..c303cff 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,6 +845,7 @@ void __noreturn do_exit(long code)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ exit_task_stack_account(tsk);
check_stack_usage();
preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index ac63e7f..2582812 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,9 +211,8 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
{
- struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
int ret;
@@ -239,6 +238,7 @@ err:
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
+ struct vm_struct *vm;
void *stack;
int i;
@@ -256,7 +256,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
- if (memcg_charge_kernel_stack(tsk)) {
+ if (memcg_charge_kernel_stack(s)) {
vfree(s->addr);
return -ENOMEM;
}
@@ -279,7 +279,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!stack)
return -ENOMEM;
- if (memcg_charge_kernel_stack(tsk)) {
+ vm = find_vm_area(stack);
+ if (memcg_charge_kernel_stack(vm)) {
vfree(stack);
return -ENOMEM;
}
@@ -288,19 +289,15 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- tsk->stack_vm_area = find_vm_area(stack);
+ tsk->stack_vm_area = vm;
tsk->stack = stack;
return 0;
}
static void free_thread_stack(struct task_struct *tsk)
{
- struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
-
for (i = 0; i < NR_CACHED_STACKS; i++) {
if (this_cpu_cmpxchg(cached_stacks[i], NULL,
tsk->stack_vm_area) != NULL)
@@ -454,12 +451,25 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
+void exit_task_stack_account(struct task_struct *tsk)
+{
+ account_kernel_stack(tsk, -1);
+
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm;
+ int i;
+
+ vm = task_stack_vm_area(tsk);
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ }
+}
+
static void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
- account_kernel_stack(tsk, -1);
free_thread_stack(tsk);
}
@@ -918,6 +928,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
+ account_kernel_stack(tsk, 1);
err = scs_prepare(tsk, node);
if (err)
@@ -961,8 +972,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->wake_q.next = NULL;
tsk->worker_private = NULL;
- account_kernel_stack(tsk, 1);
-
kcov_task_init(tsk);
kmap_local_fork(tsk);
@@ -981,6 +990,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
return tsk;
free_stack:
+ exit_task_stack_account(tsk);
free_thread_stack(tsk);
free_tsk:
free_task_struct(tsk);
@@ -2459,6 +2469,7 @@ bad_fork_cleanup_count:
exit_creds(p);
bad_fork_free:
WRITE_ONCE(p->__state, TASK_DEAD);
+ exit_task_stack_account(p);
put_task_stack(p);
delayed_free_task(p);
fork_out:
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch().
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (5 preceding siblings ...)
2022-02-17 10:24 ` [PATCH v2 6/8] kernel/fork: Move task stack account to do_exit() Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:26 ` [tip: core/core] fork: " tip-bot2 for Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 8/8] kernel/fork: Use IS_ENABLED() in account_kernel_stack() Sebastian Andrzej Siewior
` (2 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
The task stack could be deallocated later. For fork()/exec() kind of
workloads (say a shell script executing several commands) it is
important that the stack is released in finish_task_switch() so that in
VMAP_STACK case it can be cached and reused in the new task.
For PREEMPT_RT it would be good if the wake-up in vfree_atomic() could
be avoided in the scheduling path. Far worse are the other
free_thread_stack() implementations which invoke __free_pages()/
kmem_cache_free() with disabled preemption.
Cache the stack in free_thread_stack() in the VMAP_STACK case and
RCU-delay the free path otherwise. Free the stack in the RCU callback.
In the VMAP_STACK case this is another opportunity to fill the cache.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/fork.c | 76 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 63 insertions(+), 13 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 984f69d6f211f..aa17ed2a2afc7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,41 @@ static inline void free_task_struct(struct task_struct *tsk)
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+struct vm_stack {
+ struct rcu_head rcu;
+ struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+ if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+ return;
+
+ vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct vm_stack *vm_stack = tsk->stack;
+
+ vm_stack->stack_vm_area = tsk->stack_vm_area;
+ call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
static int free_vm_stack_cache(unsigned int cpu)
{
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -296,24 +331,27 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- int i;
+ if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+ thread_stack_delayed_free(tsk);
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i], NULL,
- tsk->stack_vm_area) != NULL)
- continue;
-
- tsk->stack = NULL;
- tsk->stack_vm_area = NULL;
- return;
- }
- vfree_atomic(tsk->stack);
tsk->stack = NULL;
tsk->stack_vm_area = NULL;
}
# else /* !CONFIG_VMAP_STACK */
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -328,7 +366,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
@@ -337,6 +375,18 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ kmem_cache_free(thread_stack_cache, rh);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
@@ -348,7 +398,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, tsk->stack);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Only cache the VMAP stack in finish_task_switch()
2022-02-17 10:24 ` [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
@ 2022-02-22 21:26 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:26 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: e540bf3162e822d7a1f07e69e3bb1b4f925ca368
Gitweb: https://git.kernel.org/tip/e540bf3162e822d7a1f07e69e3bb1b4f925ca368
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:05 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:02 +01:00
fork: Only cache the VMAP stack in finish_task_switch()
The task stack could be deallocated later, but for fork()/exec() kind of
workloads (say a shell script executing several commands) it is important
that the stack is released in finish_task_switch() so that in VMAP_STACK
case it can be cached and reused in the new task.
For PREEMPT_RT it would be good if the wake-up in vfree_atomic() could
be avoided in the scheduling path. Far worse are the other
free_thread_stack() implementations which invoke __free_pages()/
kmem_cache_free() with disabled preemption.
Cache the stack in free_thread_stack() in the VMAP_STACK case and
RCU-delay the free path otherwise. Free the stack in the RCU callback.
In the VMAP_STACK case this is another opportunity to fill the cache.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-8-bigeasy@linutronix.de
---
kernel/fork.c | 76 +++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 63 insertions(+), 13 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 2582812..177bc64 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,41 @@ static inline void free_task_struct(struct task_struct *tsk)
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+struct vm_stack {
+ struct rcu_head rcu;
+ struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+ if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+ return;
+
+ vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct vm_stack *vm_stack = tsk->stack;
+
+ vm_stack->stack_vm_area = tsk->stack_vm_area;
+ call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
static int free_vm_stack_cache(unsigned int cpu)
{
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -296,24 +331,27 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- int i;
+ if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+ thread_stack_delayed_free(tsk);
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i], NULL,
- tsk->stack_vm_area) != NULL)
- continue;
-
- tsk->stack = NULL;
- tsk->stack_vm_area = NULL;
- return;
- }
- vfree_atomic(tsk->stack);
tsk->stack = NULL;
tsk->stack_vm_area = NULL;
}
# else /* !CONFIG_VMAP_STACK */
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -328,7 +366,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
@@ -337,6 +375,18 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ kmem_cache_free(thread_stack_cache, rh);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
@@ -348,7 +398,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, tsk->stack);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 8/8] kernel/fork: Use IS_ENABLED() in account_kernel_stack().
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (6 preceding siblings ...)
2022-02-17 10:24 ` [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
@ 2022-02-17 10:24 ` Sebastian Andrzej Siewior
2022-02-22 21:26 ` [tip: core/core] fork: " tip-bot2 for Sebastian Andrzej Siewior
2022-02-21 18:36 ` [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Andy Lutomirski
2022-02-21 18:44 ` Andy Lutomirski
9 siblings, 1 reply; 19+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-02-17 10:24 UTC (permalink / raw)
To: linux-kernel, linux-ia64
Cc: Andy Lutomirski, Ben Segall, Daniel Bristot de Oliveira,
Dietmar Eggemann, Ingo Molnar, Juri Lelli, Peter Zijlstra,
Steven Rostedt, Thomas Gleixner, Vincent Guittot,
Sebastian Andrzej Siewior
Not strickly needed but checking CONFIG_VMAP_STACK instead of
task_stack_vm_area()' result allows the compiler the remove the else
path in the CONFIG_VMAP_STACK case where the pointer can't be NULL.
Check for CONFIG_VMAP_STACK in order to use the proper path.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/fork.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index aa17ed2a2afc7..4c97e0b5fb62a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -485,16 +485,16 @@ void vm_area_free(struct vm_area_struct *vma)
static void account_kernel_stack(struct task_struct *tsk, int account)
{
- void *stack = task_stack_page(tsk);
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
account * (PAGE_SIZE / 1024));
} else {
+ void *stack = task_stack_page(tsk);
+
/* All stack pages are in the same node. */
mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [tip: core/core] fork: Use IS_ENABLED() in account_kernel_stack()
2022-02-17 10:24 ` [PATCH v2 8/8] kernel/fork: Use IS_ENABLED() in account_kernel_stack() Sebastian Andrzej Siewior
@ 2022-02-22 21:26 ` tip-bot2 for Sebastian Andrzej Siewior
0 siblings, 0 replies; 19+ messages in thread
From: tip-bot2 for Sebastian Andrzej Siewior @ 2022-02-22 21:26 UTC (permalink / raw)
To: linux-tip-commits
Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Andy Lutomirski, x86,
linux-kernel
The following commit has been merged into the core/core branch of tip:
Commit-ID: 0ce055f85335e48bc571114d61a70ae217039362
Gitweb: https://git.kernel.org/tip/0ce055f85335e48bc571114d61a70ae217039362
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 17 Feb 2022 11:24:06 +01:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 22 Feb 2022 22:25:02 +01:00
fork: Use IS_ENABLED() in account_kernel_stack()
Not strickly needed but checking CONFIG_VMAP_STACK instead of
task_stack_vm_area()' result allows the compiler the remove the else
path in the CONFIG_VMAP_STACK case where the pointer can't be NULL.
Check for CONFIG_VMAP_STACK in order to use the proper path.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-9-bigeasy@linutronix.de
---
kernel/fork.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 177bc64..1279b57 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -485,16 +485,16 @@ void vm_area_free(struct vm_area_struct *vma)
static void account_kernel_stack(struct task_struct *tsk, int account)
{
- void *stack = task_stack_page(tsk);
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
account * (PAGE_SIZE / 1024));
} else {
+ void *stack = task_stack_page(tsk);
+
/* All stack pages are in the same node. */
mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path.
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (7 preceding siblings ...)
2022-02-17 10:24 ` [PATCH v2 8/8] kernel/fork: Use IS_ENABLED() in account_kernel_stack() Sebastian Andrzej Siewior
@ 2022-02-21 18:36 ` Andy Lutomirski
2022-02-21 18:44 ` Andy Lutomirski
9 siblings, 0 replies; 19+ messages in thread
From: Andy Lutomirski @ 2022-02-21 18:36 UTC (permalink / raw)
To: Sebastian Andrzej Siewior, linux-kernel, linux-ia64
Cc: Ben Segall, Daniel Bristot de Oliveira, Dietmar Eggemann,
Ingo Molnar, Juri Lelli, Peter Zijlstra, Steven Rostedt,
Thomas Gleixner, Vincent Guittot
On 2/17/22 02:23, Sebastian Andrzej Siewior wrote:
> This is a follup-up on the patch
> sched: Delay task stack freeing on RT
> https://lkml.kernel.org/r/20210928122411.593486363@linutronix.de
>
> It addresses the review feedback:
> - Decouple stack accounting from its free invocation. The accounting
> happens in do_exit(), the final free call happens later.
>
> - Add put_task_stack_sched() to finish_task_switch(). Here the VMAP
> stack is cached only. If it fails, or in the !VMAP case then the final
> free happens in delayed_put_task_struct(). This is also an oportunity
> to cache the stack.
My first two tries to apply this series to something failed. What's it
based on?
The rest of my review will be based on diffs, not real code, since I
failed to apply it.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path.
2022-02-17 10:23 [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
` (8 preceding siblings ...)
2022-02-21 18:36 ` [PATCH v2 0/8] kernel/fork: Move thread stack free otu of the scheduler path Andy Lutomirski
@ 2022-02-21 18:44 ` Andy Lutomirski
9 siblings, 0 replies; 19+ messages in thread
From: Andy Lutomirski @ 2022-02-21 18:44 UTC (permalink / raw)
To: Sebastian Andrzej Siewior, linux-kernel, linux-ia64
Cc: Ben Segall, Daniel Bristot de Oliveira, Dietmar Eggemann,
Ingo Molnar, Juri Lelli, Peter Zijlstra, Steven Rostedt,
Thomas Gleixner, Vincent Guittot
On 2/17/22 02:23, Sebastian Andrzej Siewior wrote:
> This is a follup-up on the patch
> sched: Delay task stack freeing on RT
> https://lkml.kernel.org/r/20210928122411.593486363@linutronix.de
>
> It addresses the review feedback:
> - Decouple stack accounting from its free invocation. The accounting
> happens in do_exit(), the final free call happens later.
>
> - Add put_task_stack_sched() to finish_task_switch(). Here the VMAP
> stack is cached only. If it fails, or in the !VMAP case then the final
> free happens in delayed_put_task_struct(). This is also an oportunity
> to cache the stack.
>
> Changes since v1:
> - Drop the bit marked und use addtional RCU head to free the stack in
> case that it can't be cached. Suggested by Andy Lutomirski.
Acked-by: Andy Lutomirski <luto@kernel.org>
This version is much nicer. Thanks!
--Andy
^ permalink raw reply [flat|nested] 19+ messages in thread