* [RFC][PATCH 0/4] sched: Optimize cgroup muck
@ 2012-06-14 13:29 Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
` (3 more replies)
0 siblings, 4 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel
Hi, I finally got some time to make this stuff work (it boots!).
I haven't actually done any benchmarks etc.. but various people expressed
interest in the fold put_prev_task into pick_next_task thing.
Please give it a spin or so..
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC][PATCH 1/4] sched/fair: track cgroup depth
2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
` (2 subsequent siblings)
3 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_1.patch --]
[-- Type: text/plain, Size: 3778 bytes --]
Track depth in cgroup tree, this is useful for things like
find_matching_se() where you need to get to a common parent of two
sched entities.
Keeping the depth avoids having to calculate it on the spot, which
saves a number of possible cache-misses.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 1 +
kernel/sched/fair.c | 42 +++++++++++++++++++-----------------------
2 files changed, 20 insertions(+), 23 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1186,6 +1186,7 @@ struct sched_entity {
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
+ int depth;
struct sched_entity *parent;
/* rq on which this entity is (to be) queued: */
struct cfs_rq *cfs_rq;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -295,13 +295,13 @@ static inline void list_del_leaf_cfs_rq(
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
/* Do the two (enqueued) entities belong to the same group ? */
-static inline int
+static inline struct cfs_rq *
is_same_group(struct sched_entity *se, struct sched_entity *pse)
{
if (se->cfs_rq == pse->cfs_rq)
- return 1;
+ return se->cfs_rq;
- return 0;
+ return NULL;
}
static inline struct sched_entity *parent_entity(struct sched_entity *se)
@@ -309,17 +309,6 @@ static inline struct sched_entity *paren
return se->parent;
}
-/* return depth at which a sched entity is present in the hierarchy */
-static inline int depth_se(struct sched_entity *se)
-{
- int depth = 0;
-
- for_each_sched_entity(se)
- depth++;
-
- return depth;
-}
-
static void
find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
@@ -333,8 +322,8 @@ find_matching_se(struct sched_entity **s
*/
/* First walk up until both entities are at same depth */
- se_depth = depth_se(*se);
- pse_depth = depth_se(*pse);
+ se_depth = (*se)->depth;
+ pse_depth = (*pse)->depth;
while (se_depth > pse_depth) {
se_depth--;
@@ -399,10 +388,10 @@ static inline void list_del_leaf_cfs_rq(
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
-static inline int
+static inline struct cfs_rq *
is_same_group(struct sched_entity *se, struct sched_entity *pse)
{
- return 1;
+ return cfs_rq_of(se); /* always the same rq */
}
static inline struct sched_entity *parent_entity(struct sched_entity *se)
@@ -5246,6 +5235,8 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_FAIR_GROUP_SCHED
static void task_move_group_fair(struct task_struct *p, int on_rq)
{
+ struct sched_entity *se = &p->se;
+
/*
* If the task was not on the rq at the time of this cgroup movement
* it must have been asleep, sleeping tasks keep their ->vruntime
@@ -5271,14 +5262,16 @@ static void task_move_group_fair(struct
* To prevent boost or penalty in the new cfs_rq caused by delta
* min_vruntime between the two cfs_rqs, we skip vruntime adjustment.
*/
- if (!on_rq && (!p->se.sum_exec_runtime || p->state == TASK_WAKING))
+ if (!on_rq && (!se->sum_exec_runtime || p->state == TASK_WAKING))
on_rq = 1;
if (!on_rq)
- p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
+ se->vruntime -= cfs_rq_of(se)->min_vruntime;
set_task_rq(p, task_cpu(p));
+ if (se->parent)
+ se->depth = se->parent->depth + 1;
if (!on_rq)
- p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
+ se->vruntime += cfs_rq_of(se)->min_vruntime;
}
void free_fair_sched_group(struct task_group *tg)
@@ -5376,10 +5369,13 @@ void init_tg_cfs_entry(struct task_group
if (!se)
return;
- if (!parent)
+ if (!parent) {
se->cfs_rq = &rq->cfs;
- else
+ se->depth = 0;
+ } else {
se->cfs_rq = parent->my_q;
+ se->depth = parent->depth + 1;
+ }
se->my_q = cfs_rq;
update_load_set(&se->load, 0);
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
2012-06-14 14:32 ` Steven Rostedt
` (2 more replies)
2012-06-14 13:29 ` [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_* Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
3 siblings, 3 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_2.patch --]
[-- Type: text/plain, Size: 4971 bytes --]
In order to avoid having to do put/set on a whole cgroup hierarchy
when we context switch, push the put into pick_next_task() so that
both operations are in the same function. Further changes then allow
us to possibly optimize away redundant work.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 3 ++-
kernel/sched/core.c | 13 +++++++------
kernel/sched/fair.c | 6 +++++-
kernel/sched/idle_task.c | 6 +++++-
kernel/sched/rt.c | 27 ++++++++++++++++-----------
kernel/sched/stop_task.c | 9 +++++++--
6 files changed, 42 insertions(+), 22 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1093,7 +1093,8 @@ struct sched_class {
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
- struct task_struct * (*pick_next_task) (struct rq *rq);
+ struct task_struct * (*pick_next_task) (struct rq *rq,
+ struct task_struct *prev);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3176,7 +3176,7 @@ static void put_prev_task(struct rq *rq,
* Pick up the highest-prio task:
*/
static inline struct task_struct *
-pick_next_task(struct rq *rq)
+pick_next_task(struct rq *rq, struct task_struct *prev)
{
const struct sched_class *class;
struct task_struct *p;
@@ -3186,13 +3186,13 @@ pick_next_task(struct rq *rq)
* the fair class we can call that function directly:
*/
if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
- p = fair_sched_class.pick_next_task(rq);
+ p = fair_sched_class.pick_next_task(rq, prev);
if (likely(p))
return p;
}
for_each_class(class) {
- p = class->pick_next_task(rq);
+ p = class->pick_next_task(rq, prev);
if (p)
return p;
}
@@ -3253,8 +3253,9 @@ static void __sched __schedule(void)
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
- put_prev_task(rq, prev);
- next = pick_next_task(rq);
+ if (prev->on_rq || rq->skip_clock_update < 0)
+ update_rq_clock(rq);
+ next = pick_next_task(rq, prev);
clear_tsk_need_resched(prev);
rq->skip_clock_update = 0;
@@ -5200,7 +5201,7 @@ static void migrate_tasks(unsigned int d
if (rq->nr_running == 1)
break;
- next = pick_next_task(rq);
+ next = pick_next_task(rq, NULL);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2990,7 +2990,8 @@ static void check_preempt_wakeup(struct
set_last_buddy(se);
}
-static struct task_struct *pick_next_task_fair(struct rq *rq)
+static struct task_struct *
+pick_next_task_fair(struct rq *rq, struct task_struct *prev)
{
struct task_struct *p;
struct cfs_rq *cfs_rq = &rq->cfs;
@@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
if (!cfs_rq->nr_running)
return NULL;
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
do {
se = pick_next_entity(cfs_rq);
set_next_entity(cfs_rq, se);
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
resched_task(rq->idle);
}
-static struct task_struct *pick_next_task_idle(struct rq *rq)
+static struct task_struct *
+pick_next_task_idle(struct rq *rq, struct task_struct *prev)
{
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
schedstat_inc(rq, sched_goidle);
calc_load_account_idle(rq);
return rq->idle;
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1346,15 +1346,7 @@ static struct task_struct *_pick_next_ta
{
struct sched_rt_entity *rt_se;
struct task_struct *p;
- struct rt_rq *rt_rq;
-
- rt_rq = &rq->rt;
-
- if (!rt_rq->rt_nr_running)
- return NULL;
-
- if (rt_rq_throttled(rt_rq))
- return NULL;
+ struct rt_rq *rt_rq = &rq->rt;
do {
rt_se = pick_next_rt_entity(rq, rt_rq);
@@ -1368,9 +1360,22 @@ static struct task_struct *_pick_next_ta
return p;
}
-static struct task_struct *pick_next_task_rt(struct rq *rq)
+static struct task_struct *
+pick_next_task_rt(struct rq *rq, struct task_struct *prev)
{
- struct task_struct *p = _pick_next_task_rt(rq);
+ struct task_struct *p;
+ struct rt_rq *rt_rq = &rq->rt;
+
+ if (!rt_rq->rt_nr_running)
+ return NULL;
+
+ if (rt_rq_throttled(rt_rq))
+ return NULL;
+
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
+ p = _pick_next_task_rt(rq);
/* The running task is never eligible for pushing */
if (p)
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -23,12 +23,17 @@ check_preempt_curr_stop(struct rq *rq, s
/* we're never preempted */
}
-static struct task_struct *pick_next_task_stop(struct rq *rq)
+static struct task_struct *
+pick_next_task_stop(struct rq *rq, struct task_struct *prev)
{
struct task_struct *stop = rq->stop;
- if (stop && stop->on_rq)
+ if (stop && stop->on_rq) {
+ if (prev)
+ prev->sched_class->put_prev_task(rq, prev);
+
return stop;
+ }
return NULL;
}
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_*
2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
3 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_3.patch --]
[-- Type: text/plain, Size: 1079 bytes --]
Slightly easier code flow, no functional changes.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched/fair.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1115,10 +1115,10 @@ static void __clear_buddies_last(struct
{
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- if (cfs_rq->last == se)
- cfs_rq->last = NULL;
- else
+ if (cfs_rq->last != se)
break;
+
+ cfs_rq->last = NULL;
}
}
@@ -1126,10 +1126,10 @@ static void __clear_buddies_next(struct
{
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- if (cfs_rq->next == se)
- cfs_rq->next = NULL;
- else
+ if (cfs_rq->next != se)
break;
+
+ cfs_rq->next = NULL;
}
}
@@ -1137,10 +1137,10 @@ static void __clear_buddies_skip(struct
{
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- if (cfs_rq->skip == se)
- cfs_rq->skip = NULL;
- else
+ if (cfs_rq->skip != se)
break;
+
+ cfs_rq->skip = NULL;
}
}
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
` (2 preceding siblings ...)
2012-06-14 13:29 ` [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_* Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
2012-06-14 21:19 ` Peter Zijlstra
3 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_4.patch --]
[-- Type: text/plain, Size: 3335 bytes --]
Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
path") it is likely we pick a new task from the same cgroup, doing a put
and then set on all intermediate entities is a waste of time, so try to
avoid this.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched/fair.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 68 insertions(+), 7 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1285,15 +1285,46 @@ wakeup_preempt_entity(struct sched_entit
*/
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
- struct sched_entity *se = __pick_first_entity(cfs_rq);
- struct sched_entity *left = se;
+ struct sched_entity *left = __pick_first_entity(cfs_rq);
+ struct sched_entity *se, *curr = cfs_rq->curr;
+
+ /*
+ * Since its possible we got here without doing put_prev_entity() we
+ * also have to consider cfs_rq->curr. If it was set, and is still a
+ * runnable entity, update_curr() will update its vruntime, otherwise
+ * forget we've ever seen it.
+ */
+ if (curr) {
+ if (curr->on_rq)
+ update_curr(cfs_rq);
+ else
+ curr = NULL;
+ }
+
+ /*
+ * If curr is set we have to see if its left of the leftmost entity
+ * still in the tree, provided there was anything in the tree at all.
+ */
+ if (!left || (curr && entity_before(curr, left)))
+ left = curr;
+
+ se = left; /* ideally we run the leftmost entity */
/*
* Avoid running the skip buddy, if running something else can
* be done without getting too unfair.
*/
if (cfs_rq->skip == se) {
- struct sched_entity *second = __pick_next_entity(se);
+ struct sched_entity *second;
+
+ if (se == curr) {
+ second = __pick_first_entity(cfs_rq);
+ } else {
+ second = __pick_next_entity(se);
+ if (!second || (curr && entity_before(curr, second)))
+ second = curr;
+ }
+
if (second && wakeup_preempt_entity(second, left) < 1)
se = second;
}
@@ -2993,23 +3024,53 @@ static void check_preempt_wakeup(struct
static struct task_struct *
pick_next_task_fair(struct rq *rq, struct task_struct *prev)
{
- struct task_struct *p;
struct cfs_rq *cfs_rq = &rq->cfs;
- struct sched_entity *se;
+ struct sched_entity *se, *pse;
+ struct task_struct *p;
if (!cfs_rq->nr_running)
return NULL;
- if (prev)
+ if (prev && (prev->sched_class != &fair_sched_class)) {
prev->sched_class->put_prev_task(rq, prev);
+ prev = NULL;
+ }
do {
se = pick_next_entity(cfs_rq);
- set_next_entity(cfs_rq, se);
+ if (!prev)
+ set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);
p = task_of(se);
+
+ /*
+ * If we haven't yet done put_prev_entity and the selected task is
+ * a different task than we started out with, try and touch the least
+ * amount of cfs_rq trees.
+ */
+ if (prev && prev != p) {
+ pse = &prev->se;
+
+ while (!(cfs_rq = is_same_group(se, pse))) {
+ int se_depth = se->depth;
+ int pse_depth = pse->depth;
+
+ if (se_depth <= pse_depth) {
+ put_prev_entity(cfs_rq_of(pse), pse);
+ pse = parent_entity(pse);
+ }
+ if (se_depth >= pse_depth) {
+ set_next_entity(cfs_rq_of(se), se);
+ se = parent_entity(se);
+ }
+ }
+
+ put_prev_entity(cfs_rq, pse);
+ set_next_entity(cfs_rq, se);
+ }
+
if (hrtick_enabled(rq))
hrtick_start_fair(rq, p);
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
@ 2012-06-14 14:32 ` Steven Rostedt
2012-06-14 14:58 ` Peter Zijlstra
2012-06-15 9:49 ` Glauber Costa
2012-06-21 7:35 ` Michael Wang
2 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2012-06-14 14:32 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, glommer, linux-kernel
On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3176,7 +3176,7 @@ static void put_prev_task(struct rq *rq,
> * Pick up the highest-prio task:
> */
> static inline struct task_struct *
> -pick_next_task(struct rq *rq)
> +pick_next_task(struct rq *rq, struct task_struct *prev)
> {
> const struct sched_class *class;
> struct task_struct *p;
> @@ -3186,13 +3186,13 @@ pick_next_task(struct rq *rq)
> * the fair class we can call that function directly:
> */
> if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
> - p = fair_sched_class.pick_next_task(rq);
> + p = fair_sched_class.pick_next_task(rq, prev);
> if (likely(p))
> return p;
> }
>
> for_each_class(class) {
> - p = class->pick_next_task(rq);
> + p = class->pick_next_task(rq, prev);
> if (p)
> return p;
> }
> @@ -3253,8 +3253,9 @@ static void __sched __schedule(void)
> if (unlikely(!rq->nr_running))
> idle_balance(cpu, rq);
>
> - put_prev_task(rq, prev);
You remove the call to put_prev_task() which looks to be the only user
of this static function. But I don't see the deletion of this function.
-- Steve
> - next = pick_next_task(rq);
> + if (prev->on_rq || rq->skip_clock_update < 0)
> + update_rq_clock(rq);
> + next = pick_next_task(rq, prev);
> clear_tsk_need_resched(prev);
> rq->skip_clock_update = 0;
>
> @@ -5200,7 +5201,7 @@ static void migrate_tasks(unsigned int d
> if (rq->nr_running == 1)
> break;
>
> - next = pick_next_task(rq);
> + next = pick_next_task(rq, NULL);
> BUG_ON(!next);
> next->sched_class->put_prev_task(rq, next);
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-14 14:32 ` Steven Rostedt
@ 2012-06-14 14:58 ` Peter Zijlstra
0 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 14:58 UTC (permalink / raw)
To: Steven Rostedt; +Cc: mingo, pjt, venki, efault, glommer, linux-kernel
On Thu, 2012-06-14 at 10:32 -0400, Steven Rostedt wrote:
>
> You remove the call to put_prev_task() which looks to be the only user
> of this static function. But I don't see the deletion of this
> function.
>
>
Right you are.. must've overlooked my compiler complaining about this.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
@ 2012-06-14 21:19 ` Peter Zijlstra
2012-06-15 14:03 ` Peter Zijlstra
0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 21:19 UTC (permalink / raw)
To: mingo; +Cc: pjt, venki, efault, rostedt, glommer, linux-kernel
On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
> Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
> path") it is likely we pick a new task from the same cgroup, doing a put
> and then set on all intermediate entities is a waste of time, so try to
> avoid this.
I just noticed put_prev_entity() also does the bandwidth enforcement
stuff, I think I just broke that. Will have a peek at fixing that
tomorrow or so.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
2012-06-14 14:32 ` Steven Rostedt
@ 2012-06-15 9:49 ` Glauber Costa
2012-06-15 10:03 ` Peter Zijlstra
2012-06-21 7:35 ` Michael Wang
2 siblings, 1 reply; 16+ messages in thread
From: Glauber Costa @ 2012-06-15 9:49 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, linux-kernel
On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
> +static struct task_struct *
> +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
> {
> - struct task_struct *p = _pick_next_task_rt(rq);
> + struct task_struct *p;
> + struct rt_rq *rt_rq =&rq->rt;
> +
> + if (!rt_rq->rt_nr_running)
> + return NULL;
> +
> + if (rt_rq_throttled(rt_rq))
> + return NULL;
> +
> + if (prev)
> + prev->sched_class->put_prev_task(rq, prev);
> +
it might be me, but this one sounds strange. If the responsibility of
putting the task now lays with pic_next_task, you can't return NULL
without doing this first.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-15 10:03 ` Peter Zijlstra
@ 2012-06-15 10:01 ` Glauber Costa
0 siblings, 0 replies; 16+ messages in thread
From: Glauber Costa @ 2012-06-15 10:01 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, linux-kernel
On 06/15/2012 02:03 PM, Peter Zijlstra wrote:
> On Fri, 2012-06-15 at 13:49 +0400, Glauber Costa wrote:
>> On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
>>> +static struct task_struct *
>>> +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
>>> {
>>> - struct task_struct *p = _pick_next_task_rt(rq);
>>> + struct task_struct *p;
>>> + struct rt_rq *rt_rq =&rq->rt;
>>> +
>>> + if (!rt_rq->rt_nr_running)
>>> + return NULL;
>>> +
>>> + if (rt_rq_throttled(rt_rq))
>>> + return NULL;
>>> +
>>> + if (prev)
>>> + prev->sched_class->put_prev_task(rq, prev);
>>> +
>> it might be me, but this one sounds strange. If the responsibility of
>> putting the task now lays with pic_next_task, you can't return NULL
>> without doing this first.
>
> Its the responsibility of the sched_class::pick_next_task implementation
> that will return the next task. Not of the first one called.
>
> Clearly this needs a comment.. /me adds.
hummmm, yeah, you are right. The comment does make it clearer.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-15 9:49 ` Glauber Costa
@ 2012-06-15 10:03 ` Peter Zijlstra
2012-06-15 10:01 ` Glauber Costa
0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-15 10:03 UTC (permalink / raw)
To: Glauber Costa; +Cc: mingo, pjt, venki, efault, rostedt, linux-kernel
On Fri, 2012-06-15 at 13:49 +0400, Glauber Costa wrote:
> On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
> > +static struct task_struct *
> > +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
> > {
> > - struct task_struct *p = _pick_next_task_rt(rq);
> > + struct task_struct *p;
> > + struct rt_rq *rt_rq =&rq->rt;
> > +
> > + if (!rt_rq->rt_nr_running)
> > + return NULL;
> > +
> > + if (rt_rq_throttled(rt_rq))
> > + return NULL;
> > +
> > + if (prev)
> > + prev->sched_class->put_prev_task(rq, prev);
> > +
> it might be me, but this one sounds strange. If the responsibility of
> putting the task now lays with pic_next_task, you can't return NULL
> without doing this first.
Its the responsibility of the sched_class::pick_next_task implementation
that will return the next task. Not of the first one called.
Clearly this needs a comment.. /me adds.
---
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1096,6 +1096,11 @@ struct sched_class {
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
+ /*
+ * It is the responsibility of the pick_next_task() method that will
+ * return the next task to call put_prev_task() on the @prev task or
+ * something equivalent.
+ */
struct task_struct * (*pick_next_task) (struct rq *rq,
struct task_struct *prev);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
2012-06-14 21:19 ` Peter Zijlstra
@ 2012-06-15 14:03 ` Peter Zijlstra
2012-06-19 8:45 ` Paul Turner
0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-15 14:03 UTC (permalink / raw)
To: mingo; +Cc: pjt, venki, efault, rostedt, glommer, linux-kernel
On Thu, 2012-06-14 at 23:19 +0200, Peter Zijlstra wrote:
> On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
> > Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
> > path") it is likely we pick a new task from the same cgroup, doing a put
> > and then set on all intermediate entities is a waste of time, so try to
> > avoid this.
>
> I just noticed put_prev_entity() also does the bandwidth enforcement
> stuff, I think I just broke that. Will have a peek at fixing that
> tomorrow or so.
Damn, that's annoying, that wants to be done bottom-up, while we're now
doing a top-down selection. pjt any sane ideas?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
2012-06-15 14:03 ` Peter Zijlstra
@ 2012-06-19 8:45 ` Paul Turner
0 siblings, 0 replies; 16+ messages in thread
From: Paul Turner @ 2012-06-19 8:45 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, venki, efault, rostedt, glommer, linux-kernel
On Fri, Jun 15, 2012 at 7:03 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, 2012-06-14 at 23:19 +0200, Peter Zijlstra wrote:
>> On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
>> > Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
>> > path") it is likely we pick a new task from the same cgroup, doing a put
>> > and then set on all intermediate entities is a waste of time, so try to
>> > avoid this.
>>
>> I just noticed put_prev_entity() also does the bandwidth enforcement
>> stuff, I think I just broke that. Will have a peek at fixing that
>> tomorrow or so.
>
> Damn, that's annoying, that wants to be done bottom-up, while we're now
> doing a top-down selection. pjt any sane ideas?
I'll have a look at this tomorrow, but I think this is fairly
immediately resolvable -- I don't see any immediate reason we need to
do full enforcement here. The only interesting thing we really need
to do in the put_path is the voluntary return of quota, which -- if we
need to do it -- we will get to do since that occurs iff the entity is
no longe runnable and actually getting a put().
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
2012-06-14 14:32 ` Steven Rostedt
2012-06-15 9:49 ` Glauber Costa
@ 2012-06-21 7:35 ` Michael Wang
2012-06-21 8:41 ` Peter Zijlstra
2 siblings, 1 reply; 16+ messages in thread
From: Michael Wang @ 2012-06-21 7:35 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, glommer, linux-kernel
> +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
> {
> struct task_struct *p;
> struct cfs_rq *cfs_rq = &rq->cfs;
> @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
> if (!cfs_rq->nr_running)
> return NULL;
>
> + if (prev)
> + prev->sched_class->put_prev_task(rq, prev);
> +
> do {
> se = pick_next_entity(cfs_rq);
> set_next_entity(cfs_rq, se);
> --- a/kernel/sched/idle_task.c
> +++ b/kernel/sched/idle_task.c
> @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
> resched_task(rq->idle);
> }
Will it cause trouble when the last task on cfs_rq was dequeued before
been putted?
The cfs_rq->nr_running will be 0 and we will miss the chance to put
prev, will we?
Regards,
Michael Wang
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-21 7:35 ` Michael Wang
@ 2012-06-21 8:41 ` Peter Zijlstra
2012-06-21 9:23 ` Michael Wang
0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-21 8:41 UTC (permalink / raw)
To: Michael Wang; +Cc: mingo, pjt, venki, efault, rostedt, glommer, linux-kernel
On Thu, 2012-06-21 at 15:35 +0800, Michael Wang wrote:
> > +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
> > {
> > struct task_struct *p;
> > struct cfs_rq *cfs_rq = &rq->cfs;
> > @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
> > if (!cfs_rq->nr_running)
> > return NULL;
> >
> > + if (prev)
> > + prev->sched_class->put_prev_task(rq, prev);
> > +
> > do {
> > se = pick_next_entity(cfs_rq);
> > set_next_entity(cfs_rq, se);
> > --- a/kernel/sched/idle_task.c
> > +++ b/kernel/sched/idle_task.c
> > @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
> > resched_task(rq->idle);
> > }
>
> Will it cause trouble when the last task on cfs_rq was dequeued before
> been putted?
>
> The cfs_rq->nr_running will be 0 and we will miss the chance to put
> prev, will we?
pick_next_task_idle() will then do the put, no?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
2012-06-21 8:41 ` Peter Zijlstra
@ 2012-06-21 9:23 ` Michael Wang
0 siblings, 0 replies; 16+ messages in thread
From: Michael Wang @ 2012-06-21 9:23 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, glommer, linux-kernel
On 06/21/2012 04:41 PM, Peter Zijlstra wrote:
> On Thu, 2012-06-21 at 15:35 +0800, Michael Wang wrote:
>>> +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
>>> {
>>> struct task_struct *p;
>>> struct cfs_rq *cfs_rq = &rq->cfs;
>>> @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
>>> if (!cfs_rq->nr_running)
>>> return NULL;
>>>
>>> + if (prev)
>>> + prev->sched_class->put_prev_task(rq, prev);
>>> +
>>> do {
>>> se = pick_next_entity(cfs_rq);
>>> set_next_entity(cfs_rq, se);
>>> --- a/kernel/sched/idle_task.c
>>> +++ b/kernel/sched/idle_task.c
>>> @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
>>> resched_task(rq->idle);
>>> }
>>
>> Will it cause trouble when the last task on cfs_rq was dequeued before
>> been putted?
>>
>> The cfs_rq->nr_running will be 0 and we will miss the chance to put
>> prev, will we?
>
> pick_next_task_idle() will then do the put, no?
Oh, that's right, I see.
>
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2012-06-21 9:23 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
2012-06-14 14:32 ` Steven Rostedt
2012-06-14 14:58 ` Peter Zijlstra
2012-06-15 9:49 ` Glauber Costa
2012-06-15 10:03 ` Peter Zijlstra
2012-06-15 10:01 ` Glauber Costa
2012-06-21 7:35 ` Michael Wang
2012-06-21 8:41 ` Peter Zijlstra
2012-06-21 9:23 ` Michael Wang
2012-06-14 13:29 ` [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_* Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
2012-06-14 21:19 ` Peter Zijlstra
2012-06-15 14:03 ` Peter Zijlstra
2012-06-19 8:45 ` Paul Turner
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.