* [PATCH] sched: remove redundant cpu_cgrp_subsys->fork()
@ 2022-06-22 9:48 Chengming Zhou
2022-06-28 12:34 ` Chengming Zhou
2022-06-29 14:05 ` Vincent Guittot
0 siblings, 2 replies; 4+ messages in thread
From: Chengming Zhou @ 2022-06-22 9:48 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
rostedt, bsegall, rdunlap, vschneid
Cc: linux-kernel, Chengming Zhou
We use cpu_cgrp_subsys->fork() to set task group for the new fair task
in cgroup_post_fork().
Since commit b1e8206582f9 ("sched: Fix yet more sched_fork() races")
has already set task group for the new fair task in sched_cgroup_fork(),
so cpu_cgrp_subsys->fork() can be removed.
cgroup_can_fork() --> pin parent's sched_task_group
sched_cgroup_fork()
__set_task_cpu --> set task group
cgroup_post_fork()
ss->fork() := cpu_cgroup_fork() --> set again
After this change, task_change_group_fair() only need to care about
task cgroup migration, make the code much simplier.
This patch move the task se depth setting to set_task_rq(), which
is called when task moves across CPUs/groups, is a better place than
attach_entity_cfs_rq(). The latter is also used when change task's
sched_class, in which case the group/depth doesn't change actually.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
kernel/sched/core.c | 27 ++++-----------------------
kernel/sched/fair.c | 31 +------------------------------
kernel/sched/sched.h | 6 ++----
3 files changed, 7 insertions(+), 57 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c16f8cc5de08..f44bc5b889ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -481,8 +481,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
* p->se.load, p->rt_priority,
* p->dl.dl_{runtime, deadline, period, flags, bw, density}
* - sched_setnuma(): p->numa_preferred_nid
- * - sched_move_task()/
- * cpu_cgroup_fork(): p->sched_task_group
+ * - sched_move_task(): p->sched_task_group
* - uclamp_update_active() p->uclamp*
*
* p->state <- TASK_*:
@@ -10098,7 +10097,7 @@ void sched_release_group(struct task_group *tg)
spin_unlock_irqrestore(&task_group_lock, flags);
}
-static void sched_change_group(struct task_struct *tsk, int type)
+static void sched_change_group(struct task_struct *tsk)
{
struct task_group *tg;
@@ -10114,7 +10113,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
#ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->task_change_group)
- tsk->sched_class->task_change_group(tsk, type);
+ tsk->sched_class->task_change_group(tsk);
else
#endif
set_task_rq(tsk, task_cpu(tsk));
@@ -10145,7 +10144,7 @@ void sched_move_task(struct task_struct *tsk)
if (running)
put_prev_task(rq, tsk);
- sched_change_group(tsk, TASK_MOVE_GROUP);
+ sched_change_group(tsk);
if (queued)
enqueue_task(rq, tsk, queue_flags);
@@ -10223,23 +10222,6 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
sched_unregister_group(tg);
}
-/*
- * This is called before wake_up_new_task(), therefore we really only
- * have to set its group bits, all the other stuff does not apply.
- */
-static void cpu_cgroup_fork(struct task_struct *task)
-{
- struct rq_flags rf;
- struct rq *rq;
-
- rq = task_rq_lock(task, &rf);
-
- update_rq_clock(rq);
- sched_change_group(task, TASK_SET_GROUP);
-
- task_rq_unlock(rq, task, &rf);
-}
-
static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
@@ -11136,7 +11118,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
.css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
.css_extra_stat_show = cpu_extra_stat_show,
- .fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
.legacy_cftypes = cpu_legacy_files,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8bed75757e65..fdb9125fe34e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11325,14 +11325,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
-#ifdef CONFIG_FAIR_GROUP_SCHED
- /*
- * Since the real-depth could have been changed (only FAIR
- * class maintain depth value), reset depth properly.
- */
- se->depth = se->parent ? se->parent->depth + 1 : 0;
-#endif
-
/* Synchronize entity with its cfs_rq */
update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
attach_entity_load_avg(cfs_rq, se);
@@ -11431,15 +11423,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-static void task_set_group_fair(struct task_struct *p)
-{
- struct sched_entity *se = &p->se;
-
- set_task_rq(p, task_cpu(p));
- se->depth = se->parent ? se->parent->depth + 1 : 0;
-}
-
-static void task_move_group_fair(struct task_struct *p)
+static void task_change_group_fair(struct task_struct *p)
{
detach_task_cfs_rq(p);
set_task_rq(p, task_cpu(p));
@@ -11451,19 +11435,6 @@ static void task_move_group_fair(struct task_struct *p)
attach_task_cfs_rq(p);
}
-static void task_change_group_fair(struct task_struct *p, int type)
-{
- switch (type) {
- case TASK_SET_GROUP:
- task_set_group_fair(p);
- break;
-
- case TASK_MOVE_GROUP:
- task_move_group_fair(p);
- break;
- }
-}
-
void free_fair_sched_group(struct task_group *tg)
{
int i;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 317480d535b0..8433e22da3ab 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1901,6 +1901,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu];
+ p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
@@ -2163,11 +2164,8 @@ struct sched_class {
void (*update_curr)(struct rq *rq);
-#define TASK_SET_GROUP 0
-#define TASK_MOVE_GROUP 1
-
#ifdef CONFIG_FAIR_GROUP_SCHED
- void (*task_change_group)(struct task_struct *p, int type);
+ void (*task_change_group)(struct task_struct *p);
#endif
};
--
2.36.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] sched: remove redundant cpu_cgrp_subsys->fork()
2022-06-22 9:48 [PATCH] sched: remove redundant cpu_cgrp_subsys->fork() Chengming Zhou
@ 2022-06-28 12:34 ` Chengming Zhou
2022-06-29 14:05 ` Vincent Guittot
1 sibling, 0 replies; 4+ messages in thread
From: Chengming Zhou @ 2022-06-28 12:34 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
rostedt, bsegall, rdunlap, vschneid
Cc: linux-kernel
Hello, friendly ping...
Thanks.
On 2022/6/22 17:48, Chengming Zhou wrote:
> We use cpu_cgrp_subsys->fork() to set task group for the new fair task
> in cgroup_post_fork().
>
> Since commit b1e8206582f9 ("sched: Fix yet more sched_fork() races")
> has already set task group for the new fair task in sched_cgroup_fork(),
> so cpu_cgrp_subsys->fork() can be removed.
>
> cgroup_can_fork() --> pin parent's sched_task_group
> sched_cgroup_fork()
> __set_task_cpu --> set task group
> cgroup_post_fork()
> ss->fork() := cpu_cgroup_fork() --> set again
>
> After this change, task_change_group_fair() only need to care about
> task cgroup migration, make the code much simplier.
>
> This patch move the task se depth setting to set_task_rq(), which
> is called when task moves across CPUs/groups, is a better place than
> attach_entity_cfs_rq(). The latter is also used when change task's
> sched_class, in which case the group/depth doesn't change actually.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
> kernel/sched/core.c | 27 ++++-----------------------
> kernel/sched/fair.c | 31 +------------------------------
> kernel/sched/sched.h | 6 ++----
> 3 files changed, 7 insertions(+), 57 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index c16f8cc5de08..f44bc5b889ab 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -481,8 +481,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
> * p->se.load, p->rt_priority,
> * p->dl.dl_{runtime, deadline, period, flags, bw, density}
> * - sched_setnuma(): p->numa_preferred_nid
> - * - sched_move_task()/
> - * cpu_cgroup_fork(): p->sched_task_group
> + * - sched_move_task(): p->sched_task_group
> * - uclamp_update_active() p->uclamp*
> *
> * p->state <- TASK_*:
> @@ -10098,7 +10097,7 @@ void sched_release_group(struct task_group *tg)
> spin_unlock_irqrestore(&task_group_lock, flags);
> }
>
> -static void sched_change_group(struct task_struct *tsk, int type)
> +static void sched_change_group(struct task_struct *tsk)
> {
> struct task_group *tg;
>
> @@ -10114,7 +10113,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> if (tsk->sched_class->task_change_group)
> - tsk->sched_class->task_change_group(tsk, type);
> + tsk->sched_class->task_change_group(tsk);
> else
> #endif
> set_task_rq(tsk, task_cpu(tsk));
> @@ -10145,7 +10144,7 @@ void sched_move_task(struct task_struct *tsk)
> if (running)
> put_prev_task(rq, tsk);
>
> - sched_change_group(tsk, TASK_MOVE_GROUP);
> + sched_change_group(tsk);
>
> if (queued)
> enqueue_task(rq, tsk, queue_flags);
> @@ -10223,23 +10222,6 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
> sched_unregister_group(tg);
> }
>
> -/*
> - * This is called before wake_up_new_task(), therefore we really only
> - * have to set its group bits, all the other stuff does not apply.
> - */
> -static void cpu_cgroup_fork(struct task_struct *task)
> -{
> - struct rq_flags rf;
> - struct rq *rq;
> -
> - rq = task_rq_lock(task, &rf);
> -
> - update_rq_clock(rq);
> - sched_change_group(task, TASK_SET_GROUP);
> -
> - task_rq_unlock(rq, task, &rf);
> -}
> -
> static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
> {
> struct task_struct *task;
> @@ -11136,7 +11118,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
> .css_released = cpu_cgroup_css_released,
> .css_free = cpu_cgroup_css_free,
> .css_extra_stat_show = cpu_extra_stat_show,
> - .fork = cpu_cgroup_fork,
> .can_attach = cpu_cgroup_can_attach,
> .attach = cpu_cgroup_attach,
> .legacy_cftypes = cpu_legacy_files,
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8bed75757e65..fdb9125fe34e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -11325,14 +11325,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
> {
> struct cfs_rq *cfs_rq = cfs_rq_of(se);
>
> -#ifdef CONFIG_FAIR_GROUP_SCHED
> - /*
> - * Since the real-depth could have been changed (only FAIR
> - * class maintain depth value), reset depth properly.
> - */
> - se->depth = se->parent ? se->parent->depth + 1 : 0;
> -#endif
> -
> /* Synchronize entity with its cfs_rq */
> update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
> attach_entity_load_avg(cfs_rq, se);
> @@ -11431,15 +11423,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
> }
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> -static void task_set_group_fair(struct task_struct *p)
> -{
> - struct sched_entity *se = &p->se;
> -
> - set_task_rq(p, task_cpu(p));
> - se->depth = se->parent ? se->parent->depth + 1 : 0;
> -}
> -
> -static void task_move_group_fair(struct task_struct *p)
> +static void task_change_group_fair(struct task_struct *p)
> {
> detach_task_cfs_rq(p);
> set_task_rq(p, task_cpu(p));
> @@ -11451,19 +11435,6 @@ static void task_move_group_fair(struct task_struct *p)
> attach_task_cfs_rq(p);
> }
>
> -static void task_change_group_fair(struct task_struct *p, int type)
> -{
> - switch (type) {
> - case TASK_SET_GROUP:
> - task_set_group_fair(p);
> - break;
> -
> - case TASK_MOVE_GROUP:
> - task_move_group_fair(p);
> - break;
> - }
> -}
> -
> void free_fair_sched_group(struct task_group *tg)
> {
> int i;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 317480d535b0..8433e22da3ab 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1901,6 +1901,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
> set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
> p->se.cfs_rq = tg->cfs_rq[cpu];
> p->se.parent = tg->se[cpu];
> + p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
> #endif
>
> #ifdef CONFIG_RT_GROUP_SCHED
> @@ -2163,11 +2164,8 @@ struct sched_class {
>
> void (*update_curr)(struct rq *rq);
>
> -#define TASK_SET_GROUP 0
> -#define TASK_MOVE_GROUP 1
> -
> #ifdef CONFIG_FAIR_GROUP_SCHED
> - void (*task_change_group)(struct task_struct *p, int type);
> + void (*task_change_group)(struct task_struct *p);
> #endif
> };
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] sched: remove redundant cpu_cgrp_subsys->fork()
2022-06-22 9:48 [PATCH] sched: remove redundant cpu_cgrp_subsys->fork() Chengming Zhou
2022-06-28 12:34 ` Chengming Zhou
@ 2022-06-29 14:05 ` Vincent Guittot
2022-06-30 1:52 ` [External] " Chengming Zhou
1 sibling, 1 reply; 4+ messages in thread
From: Vincent Guittot @ 2022-06-29 14:05 UTC (permalink / raw)
To: Chengming Zhou
Cc: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
rdunlap, vschneid, linux-kernel
On Wed, 22 Jun 2022 at 11:48, Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> We use cpu_cgrp_subsys->fork() to set task group for the new fair task
> in cgroup_post_fork().
>
> Since commit b1e8206582f9 ("sched: Fix yet more sched_fork() races")
> has already set task group for the new fair task in sched_cgroup_fork(),
> so cpu_cgrp_subsys->fork() can be removed.
>
> cgroup_can_fork() --> pin parent's sched_task_group
> sched_cgroup_fork()
> __set_task_cpu --> set task group
> cgroup_post_fork()
> ss->fork() := cpu_cgroup_fork() --> set again
>
> After this change, task_change_group_fair() only need to care about
> task cgroup migration, make the code much simplier.
>
> This patch move the task se depth setting to set_task_rq(), which
> is called when task moves across CPUs/groups, is a better place than
> attach_entity_cfs_rq(). The latter is also used when change task's
> sched_class, in which case the group/depth doesn't change actually.
>
The commit message needs some minor fixes but apart from this
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
> kernel/sched/core.c | 27 ++++-----------------------
> kernel/sched/fair.c | 31 +------------------------------
> kernel/sched/sched.h | 6 ++----
> 3 files changed, 7 insertions(+), 57 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index c16f8cc5de08..f44bc5b889ab 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -481,8 +481,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
> * p->se.load, p->rt_priority,
> * p->dl.dl_{runtime, deadline, period, flags, bw, density}
> * - sched_setnuma(): p->numa_preferred_nid
> - * - sched_move_task()/
> - * cpu_cgroup_fork(): p->sched_task_group
> + * - sched_move_task(): p->sched_task_group
> * - uclamp_update_active() p->uclamp*
> *
> * p->state <- TASK_*:
> @@ -10098,7 +10097,7 @@ void sched_release_group(struct task_group *tg)
> spin_unlock_irqrestore(&task_group_lock, flags);
> }
>
> -static void sched_change_group(struct task_struct *tsk, int type)
> +static void sched_change_group(struct task_struct *tsk)
> {
> struct task_group *tg;
>
> @@ -10114,7 +10113,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> if (tsk->sched_class->task_change_group)
> - tsk->sched_class->task_change_group(tsk, type);
> + tsk->sched_class->task_change_group(tsk);
> else
> #endif
> set_task_rq(tsk, task_cpu(tsk));
> @@ -10145,7 +10144,7 @@ void sched_move_task(struct task_struct *tsk)
> if (running)
> put_prev_task(rq, tsk);
>
> - sched_change_group(tsk, TASK_MOVE_GROUP);
> + sched_change_group(tsk);
>
> if (queued)
> enqueue_task(rq, tsk, queue_flags);
> @@ -10223,23 +10222,6 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
> sched_unregister_group(tg);
> }
>
> -/*
> - * This is called before wake_up_new_task(), therefore we really only
> - * have to set its group bits, all the other stuff does not apply.
> - */
> -static void cpu_cgroup_fork(struct task_struct *task)
> -{
> - struct rq_flags rf;
> - struct rq *rq;
> -
> - rq = task_rq_lock(task, &rf);
> -
> - update_rq_clock(rq);
> - sched_change_group(task, TASK_SET_GROUP);
> -
> - task_rq_unlock(rq, task, &rf);
> -}
> -
> static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
> {
> struct task_struct *task;
> @@ -11136,7 +11118,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
> .css_released = cpu_cgroup_css_released,
> .css_free = cpu_cgroup_css_free,
> .css_extra_stat_show = cpu_extra_stat_show,
> - .fork = cpu_cgroup_fork,
> .can_attach = cpu_cgroup_can_attach,
> .attach = cpu_cgroup_attach,
> .legacy_cftypes = cpu_legacy_files,
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8bed75757e65..fdb9125fe34e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -11325,14 +11325,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
> {
> struct cfs_rq *cfs_rq = cfs_rq_of(se);
>
> -#ifdef CONFIG_FAIR_GROUP_SCHED
> - /*
> - * Since the real-depth could have been changed (only FAIR
> - * class maintain depth value), reset depth properly.
> - */
> - se->depth = se->parent ? se->parent->depth + 1 : 0;
> -#endif
> -
> /* Synchronize entity with its cfs_rq */
> update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
> attach_entity_load_avg(cfs_rq, se);
> @@ -11431,15 +11423,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
> }
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> -static void task_set_group_fair(struct task_struct *p)
> -{
> - struct sched_entity *se = &p->se;
> -
> - set_task_rq(p, task_cpu(p));
> - se->depth = se->parent ? se->parent->depth + 1 : 0;
> -}
> -
> -static void task_move_group_fair(struct task_struct *p)
> +static void task_change_group_fair(struct task_struct *p)
> {
> detach_task_cfs_rq(p);
> set_task_rq(p, task_cpu(p));
> @@ -11451,19 +11435,6 @@ static void task_move_group_fair(struct task_struct *p)
> attach_task_cfs_rq(p);
> }
>
> -static void task_change_group_fair(struct task_struct *p, int type)
> -{
> - switch (type) {
> - case TASK_SET_GROUP:
> - task_set_group_fair(p);
> - break;
> -
> - case TASK_MOVE_GROUP:
> - task_move_group_fair(p);
> - break;
> - }
> -}
> -
> void free_fair_sched_group(struct task_group *tg)
> {
> int i;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 317480d535b0..8433e22da3ab 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1901,6 +1901,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
> set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
> p->se.cfs_rq = tg->cfs_rq[cpu];
> p->se.parent = tg->se[cpu];
> + p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
> #endif
>
> #ifdef CONFIG_RT_GROUP_SCHED
> @@ -2163,11 +2164,8 @@ struct sched_class {
>
> void (*update_curr)(struct rq *rq);
>
> -#define TASK_SET_GROUP 0
> -#define TASK_MOVE_GROUP 1
> -
> #ifdef CONFIG_FAIR_GROUP_SCHED
> - void (*task_change_group)(struct task_struct *p, int type);
> + void (*task_change_group)(struct task_struct *p);
> #endif
> };
>
> --
> 2.36.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [External] Re: [PATCH] sched: remove redundant cpu_cgrp_subsys->fork()
2022-06-29 14:05 ` Vincent Guittot
@ 2022-06-30 1:52 ` Chengming Zhou
0 siblings, 0 replies; 4+ messages in thread
From: Chengming Zhou @ 2022-06-30 1:52 UTC (permalink / raw)
To: Vincent Guittot
Cc: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
rdunlap, vschneid, linux-kernel
On 2022/6/29 22:05, Vincent Guittot wrote:
> On Wed, 22 Jun 2022 at 11:48, Chengming Zhou
> <zhouchengming@bytedance.com> wrote:
>>
>> We use cpu_cgrp_subsys->fork() to set task group for the new fair task
>> in cgroup_post_fork().
>>
>> Since commit b1e8206582f9 ("sched: Fix yet more sched_fork() races")
>> has already set task group for the new fair task in sched_cgroup_fork(),
>> so cpu_cgrp_subsys->fork() can be removed.
>>
>> cgroup_can_fork() --> pin parent's sched_task_group
>> sched_cgroup_fork()
>> __set_task_cpu --> set task group
>> cgroup_post_fork()
>> ss->fork() := cpu_cgroup_fork() --> set again
>>
>> After this change, task_change_group_fair() only need to care about
>> task cgroup migration, make the code much simplier.
>>
>> This patch move the task se depth setting to set_task_rq(), which
>> is called when task moves across CPUs/groups, is a better place than
>> attach_entity_cfs_rq(). The latter is also used when change task's
>> sched_class, in which case the group/depth doesn't change actually.
>>
>
> The commit message needs some minor fixes but apart from this
>
> Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Ok, I will try to improve and send v2. Thanks for your review.
>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>> ---
>> kernel/sched/core.c | 27 ++++-----------------------
>> kernel/sched/fair.c | 31 +------------------------------
>> kernel/sched/sched.h | 6 ++----
>> 3 files changed, 7 insertions(+), 57 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index c16f8cc5de08..f44bc5b889ab 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -481,8 +481,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
>> * p->se.load, p->rt_priority,
>> * p->dl.dl_{runtime, deadline, period, flags, bw, density}
>> * - sched_setnuma(): p->numa_preferred_nid
>> - * - sched_move_task()/
>> - * cpu_cgroup_fork(): p->sched_task_group
>> + * - sched_move_task(): p->sched_task_group
>> * - uclamp_update_active() p->uclamp*
>> *
>> * p->state <- TASK_*:
>> @@ -10098,7 +10097,7 @@ void sched_release_group(struct task_group *tg)
>> spin_unlock_irqrestore(&task_group_lock, flags);
>> }
>>
>> -static void sched_change_group(struct task_struct *tsk, int type)
>> +static void sched_change_group(struct task_struct *tsk)
>> {
>> struct task_group *tg;
>>
>> @@ -10114,7 +10113,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
>>
>> #ifdef CONFIG_FAIR_GROUP_SCHED
>> if (tsk->sched_class->task_change_group)
>> - tsk->sched_class->task_change_group(tsk, type);
>> + tsk->sched_class->task_change_group(tsk);
>> else
>> #endif
>> set_task_rq(tsk, task_cpu(tsk));
>> @@ -10145,7 +10144,7 @@ void sched_move_task(struct task_struct *tsk)
>> if (running)
>> put_prev_task(rq, tsk);
>>
>> - sched_change_group(tsk, TASK_MOVE_GROUP);
>> + sched_change_group(tsk);
>>
>> if (queued)
>> enqueue_task(rq, tsk, queue_flags);
>> @@ -10223,23 +10222,6 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
>> sched_unregister_group(tg);
>> }
>>
>> -/*
>> - * This is called before wake_up_new_task(), therefore we really only
>> - * have to set its group bits, all the other stuff does not apply.
>> - */
>> -static void cpu_cgroup_fork(struct task_struct *task)
>> -{
>> - struct rq_flags rf;
>> - struct rq *rq;
>> -
>> - rq = task_rq_lock(task, &rf);
>> -
>> - update_rq_clock(rq);
>> - sched_change_group(task, TASK_SET_GROUP);
>> -
>> - task_rq_unlock(rq, task, &rf);
>> -}
>> -
>> static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
>> {
>> struct task_struct *task;
>> @@ -11136,7 +11118,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
>> .css_released = cpu_cgroup_css_released,
>> .css_free = cpu_cgroup_css_free,
>> .css_extra_stat_show = cpu_extra_stat_show,
>> - .fork = cpu_cgroup_fork,
>> .can_attach = cpu_cgroup_can_attach,
>> .attach = cpu_cgroup_attach,
>> .legacy_cftypes = cpu_legacy_files,
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 8bed75757e65..fdb9125fe34e 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -11325,14 +11325,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
>> {
>> struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>
>> -#ifdef CONFIG_FAIR_GROUP_SCHED
>> - /*
>> - * Since the real-depth could have been changed (only FAIR
>> - * class maintain depth value), reset depth properly.
>> - */
>> - se->depth = se->parent ? se->parent->depth + 1 : 0;
>> -#endif
>> -
>> /* Synchronize entity with its cfs_rq */
>> update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
>> attach_entity_load_avg(cfs_rq, se);
>> @@ -11431,15 +11423,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
>> }
>>
>> #ifdef CONFIG_FAIR_GROUP_SCHED
>> -static void task_set_group_fair(struct task_struct *p)
>> -{
>> - struct sched_entity *se = &p->se;
>> -
>> - set_task_rq(p, task_cpu(p));
>> - se->depth = se->parent ? se->parent->depth + 1 : 0;
>> -}
>> -
>> -static void task_move_group_fair(struct task_struct *p)
>> +static void task_change_group_fair(struct task_struct *p)
>> {
>> detach_task_cfs_rq(p);
>> set_task_rq(p, task_cpu(p));
>> @@ -11451,19 +11435,6 @@ static void task_move_group_fair(struct task_struct *p)
>> attach_task_cfs_rq(p);
>> }
>>
>> -static void task_change_group_fair(struct task_struct *p, int type)
>> -{
>> - switch (type) {
>> - case TASK_SET_GROUP:
>> - task_set_group_fair(p);
>> - break;
>> -
>> - case TASK_MOVE_GROUP:
>> - task_move_group_fair(p);
>> - break;
>> - }
>> -}
>> -
>> void free_fair_sched_group(struct task_group *tg)
>> {
>> int i;
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index 317480d535b0..8433e22da3ab 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -1901,6 +1901,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
>> set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
>> p->se.cfs_rq = tg->cfs_rq[cpu];
>> p->se.parent = tg->se[cpu];
>> + p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
>> #endif
>>
>> #ifdef CONFIG_RT_GROUP_SCHED
>> @@ -2163,11 +2164,8 @@ struct sched_class {
>>
>> void (*update_curr)(struct rq *rq);
>>
>> -#define TASK_SET_GROUP 0
>> -#define TASK_MOVE_GROUP 1
>> -
>> #ifdef CONFIG_FAIR_GROUP_SCHED
>> - void (*task_change_group)(struct task_struct *p, int type);
>> + void (*task_change_group)(struct task_struct *p);
>> #endif
>> };
>>
>> --
>> 2.36.1
>>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-06-30 1:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-22 9:48 [PATCH] sched: remove redundant cpu_cgrp_subsys->fork() Chengming Zhou
2022-06-28 12:34 ` Chengming Zhou
2022-06-29 14:05 ` Vincent Guittot
2022-06-30 1:52 ` [External] " Chengming Zhou
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.