From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Andy Lutomirski <luto@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-ia64@vger.kernel.org,
Ben Segall <bsegall@google.com>,
Daniel Bristot de Oliveira <bristot@redhat.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Ingo Molnar <mingo@redhat.com>,
Juri Lelli <juri.lelli@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>,
Vincent Guittot <vincent.guittot@linaro.org>
Subject: Re: [PATCH 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch().
Date: Mon, 14 Feb 2022 17:54:47 +0100 [thread overview]
Message-ID: <YgqJV0LREU9IDJxl@linutronix.de> (raw)
In-Reply-To: <YgpJ41q35k+KCsk4@linutronix.de>
On 2022-02-14 13:24:05 [+0100], To Andy Lutomirski wrote:
> task::stack_vm_area and ::stack. Now I remember why I went for that bit.
> But I do have (hopefully) a better idea now.
Need to update the patch description but that should work then:
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 892562ebbd3aa..12b3f472b1358 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -70,6 +70,7 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
}
extern void put_task_stack(struct task_struct *tsk);
+extern void put_task_stack_sched(struct task_struct *tsk);
#else
static inline void *try_get_task_stack(struct task_struct *tsk)
{
@@ -77,6 +78,7 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
}
static inline void put_task_stack(struct task_struct *tsk) {}
+static inline void put_task_stack_sched(struct task_struct *tsk) {}
#endif
void exit_task_stack_account(struct task_struct *tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5f4e659a922e1..d7e118c86f9e6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,44 @@ static inline void free_task_struct(struct task_struct *tsk)
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+struct vm_stack {
+ struct rcu_head rcu;
+ struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+ if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+ return;
+
+ vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct vm_stack *vm_stack = tsk->stack;
+
+ vm_stack->stack_vm_area = tsk->stack_vm_area;
+ call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+}
+
static int free_vm_stack_cache(unsigned int cpu)
{
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -294,26 +332,39 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
return 0;
}
-static void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk, bool delayed_free)
{
- int i;
-
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i], NULL,
- tsk->stack_vm_area) != NULL)
- continue;
-
+ if (try_release_thread_stack_to_cache(tsk->stack_vm_area)) {
tsk->stack = NULL;
tsk->stack_vm_area = NULL;
return;
}
- vfree_atomic(tsk->stack);
+
+ if (delayed_free) {
+ thread_stack_delayed_free(tsk);
+ return;
+ }
+
+ vfree(tsk->stack);
tsk->stack = NULL;
tsk->stack_vm_area = NULL;
}
# else /* !CONFIG_VMAP_STACK */
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+ tsk->stack = NULL;
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -326,8 +377,12 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
return -ENOMEM;
}
-static void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk, bool delayed_free)
{
+ if (delayed_free) {
+ thread_stack_delayed_free(tsk);
+ return;
+ }
__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
tsk->stack = NULL;
}
@@ -337,6 +392,19 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ kmem_cache_free(thread_stack_cache, rh);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+ tsk->stack = NULL;
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
@@ -346,8 +414,12 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
return stack ? 0 : -ENOMEM;
}
-static void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk, bool delayed_free)
{
+ if (delayed_free) {
+ thread_stack_delayed_free(tsk);
+ return;
+ }
kmem_cache_free(thread_stack_cache, tsk->stack);
tsk->stack = NULL;
}
@@ -372,7 +444,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
return stack ? 0 : -ENOMEM;
}
-static void free_thread_stack(struct task_struct *tsk, bool cache_only)
+static void free_thread_stack(struct task_struct *tsk, bool delayed_free)
{
arch_free_thread_stack(tsk);
}
@@ -464,19 +536,25 @@ void exit_task_stack_account(struct task_struct *tsk)
}
}
-static void release_task_stack(struct task_struct *tsk)
+static void release_task_stack(struct task_struct *tsk, bool delayed_free)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
- free_thread_stack(tsk);
+ free_thread_stack(tsk, delayed_free);
}
#ifdef CONFIG_THREAD_INFO_IN_TASK
void put_task_stack(struct task_struct *tsk)
{
if (refcount_dec_and_test(&tsk->stack_refcount))
- release_task_stack(tsk);
+ release_task_stack(tsk, false);
+}
+
+void put_task_stack_sched(struct task_struct *tsk)
+{
+ if (refcount_dec_and_test(&tsk->stack_refcount))
+ release_task_stack(tsk, true);
}
#endif
@@ -490,7 +568,7 @@ void free_task(struct task_struct *tsk)
* The task is finally done with both the stack and thread_info,
* so free both.
*/
- release_task_stack(tsk);
+ release_task_stack(tsk, false);
#else
/*
* If the task had a separate stack allocation, it should be gone
@@ -990,7 +1068,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
free_stack:
exit_task_stack_account(tsk);
- free_thread_stack(tsk);
+ free_thread_stack(tsk, false);
free_tsk:
free_task_struct(tsk);
return NULL;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fcf0c180617c2..defe31036930a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4895,8 +4895,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
+ /*
+ * Task is done with its stack. Try to cache VMAP stack and
+ * delay free it otherwise.
+ */
+ put_task_stack_sched(prev);
put_task_struct_rcu_user(prev);
}
--
2.34.1
> > > --Andy
>
Sebastian
next prev parent reply other threads:[~2022-02-14 16:54 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-25 15:26 [PATCH REPOST 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 1/8] kernel/fork: Redo ifdefs around task's handling Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 2/8] kernel/fork: Duplicate task_struct before stack allocation Sebastian Andrzej Siewior
2022-02-11 23:42 ` Andy Lutomirski
2022-02-14 11:39 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 3/8] kernel/fork, IA64: Provide a alloc_thread_stack_node() for IA64 Sebastian Andrzej Siewior
2022-02-14 18:00 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 4/8] kernel/fork: Don't assign the stack pointer in dup_task_struct() Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 5/8] kernel/fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 6/8] kernel/fork: Move task stack account to do_exit() Sebastian Andrzej Siewior
2022-02-11 23:43 ` Andy Lutomirski
2022-01-25 15:26 ` [PATCH 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
2022-02-11 23:55 ` Andy Lutomirski
2022-02-14 12:10 ` Sebastian Andrzej Siewior
2022-02-14 12:24 ` Sebastian Andrzej Siewior
2022-02-14 16:54 ` Sebastian Andrzej Siewior [this message]
2022-02-14 17:48 ` Sebastian Andrzej Siewior
2022-02-14 18:15 ` [PATCH v2 " Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 8/8] kernel/fork: Use IS_ENABLED() in account_kernel_stack() Sebastian Andrzej Siewior
2022-02-08 17:10 ` [PATCH REPOST 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
-- strict thread matches above, loose matches on Subject: below --
2021-11-18 14:34 [PATCH " Sebastian Andrzej Siewior
2021-11-18 14:34 ` [PATCH 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YgqJV0LREU9IDJxl@linutronix.de \
--to=bigeasy@linutronix.de \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=juri.lelli@redhat.com \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=vincent.guittot@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).