linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH 0/4] sched: Optimize cgroup muck
@ 2012-06-14 13:29 Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
  To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel

Hi, I finally got some time to make this stuff work (it boots!).

I haven't actually done any benchmarks etc.. but various people expressed
interest in the fold put_prev_task into pick_next_task thing.

Please give it a spin or so..


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC][PATCH 1/4] sched/fair: track cgroup depth
  2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
  To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_1.patch --]
[-- Type: text/plain, Size: 3778 bytes --]

Track depth in cgroup tree, this is useful for things like
find_matching_se() where you need to get to a common parent of two
sched entities.

Keeping the depth avoids having to calculate it on the spot, which
saves a number of possible cache-misses.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h |    1 +
 kernel/sched/fair.c   |   42 +++++++++++++++++++-----------------------
 2 files changed, 20 insertions(+), 23 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1186,6 +1186,7 @@ struct sched_entity {
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+	int			depth;
 	struct sched_entity	*parent;
 	/* rq on which this entity is (to be) queued: */
 	struct cfs_rq		*cfs_rq;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -295,13 +295,13 @@ static inline void list_del_leaf_cfs_rq(
 	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
 
 /* Do the two (enqueued) entities belong to the same group ? */
-static inline int
+static inline struct cfs_rq *
 is_same_group(struct sched_entity *se, struct sched_entity *pse)
 {
 	if (se->cfs_rq == pse->cfs_rq)
-		return 1;
+		return se->cfs_rq;
 
-	return 0;
+	return NULL;
 }
 
 static inline struct sched_entity *parent_entity(struct sched_entity *se)
@@ -309,17 +309,6 @@ static inline struct sched_entity *paren
 	return se->parent;
 }
 
-/* return depth at which a sched entity is present in the hierarchy */
-static inline int depth_se(struct sched_entity *se)
-{
-	int depth = 0;
-
-	for_each_sched_entity(se)
-		depth++;
-
-	return depth;
-}
-
 static void
 find_matching_se(struct sched_entity **se, struct sched_entity **pse)
 {
@@ -333,8 +322,8 @@ find_matching_se(struct sched_entity **s
 	 */
 
 	/* First walk up until both entities are at same depth */
-	se_depth = depth_se(*se);
-	pse_depth = depth_se(*pse);
+	se_depth = (*se)->depth;
+	pse_depth = (*pse)->depth;
 
 	while (se_depth > pse_depth) {
 		se_depth--;
@@ -399,10 +388,10 @@ static inline void list_del_leaf_cfs_rq(
 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
 		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
 
-static inline int
+static inline struct cfs_rq *
 is_same_group(struct sched_entity *se, struct sched_entity *pse)
 {
-	return 1;
+	return cfs_rq_of(se); /* always the same rq */
 }
 
 static inline struct sched_entity *parent_entity(struct sched_entity *se)
@@ -5246,6 +5235,8 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void task_move_group_fair(struct task_struct *p, int on_rq)
 {
+	struct sched_entity *se = &p->se;
+
 	/*
 	 * If the task was not on the rq at the time of this cgroup movement
 	 * it must have been asleep, sleeping tasks keep their ->vruntime
@@ -5271,14 +5262,16 @@ static void task_move_group_fair(struct 
 	 * To prevent boost or penalty in the new cfs_rq caused by delta
 	 * min_vruntime between the two cfs_rqs, we skip vruntime adjustment.
 	 */
-	if (!on_rq && (!p->se.sum_exec_runtime || p->state == TASK_WAKING))
+	if (!on_rq && (!se->sum_exec_runtime || p->state == TASK_WAKING))
 		on_rq = 1;
 
 	if (!on_rq)
-		p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
+		se->vruntime -= cfs_rq_of(se)->min_vruntime;
 	set_task_rq(p, task_cpu(p));
+	if (se->parent)
+		se->depth = se->parent->depth + 1;
 	if (!on_rq)
-		p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
+		se->vruntime += cfs_rq_of(se)->min_vruntime;
 }
 
 void free_fair_sched_group(struct task_group *tg)
@@ -5376,10 +5369,13 @@ void init_tg_cfs_entry(struct task_group
 	if (!se)
 		return;
 
-	if (!parent)
+	if (!parent) {
 		se->cfs_rq = &rq->cfs;
-	else
+		se->depth = 0;
+	} else {
 		se->cfs_rq = parent->my_q;
+		se->depth = parent->depth + 1;
+	}
 
 	se->my_q = cfs_rq;
 	update_load_set(&se->load, 0);



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
  2012-06-14 14:32   ` Steven Rostedt
                     ` (2 more replies)
  2012-06-14 13:29 ` [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_* Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
  3 siblings, 3 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
  To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_2.patch --]
[-- Type: text/plain, Size: 4971 bytes --]

In order to avoid having to do put/set on a whole cgroup hierarchy
when we context switch, push the put into pick_next_task() so that
both operations are in the same function. Further changes then allow
us to possibly optimize away redundant work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h    |    3 ++-
 kernel/sched/core.c      |   13 +++++++------
 kernel/sched/fair.c      |    6 +++++-
 kernel/sched/idle_task.c |    6 +++++-
 kernel/sched/rt.c        |   27 ++++++++++++++++-----------
 kernel/sched/stop_task.c |    9 +++++++--
 6 files changed, 42 insertions(+), 22 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1093,7 +1093,8 @@ struct sched_class {
 
 	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
 
-	struct task_struct * (*pick_next_task) (struct rq *rq);
+	struct task_struct * (*pick_next_task) (struct rq *rq,
+						struct task_struct *prev);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3176,7 +3176,7 @@ static void put_prev_task(struct rq *rq,
  * Pick up the highest-prio task:
  */
 static inline struct task_struct *
-pick_next_task(struct rq *rq)
+pick_next_task(struct rq *rq, struct task_struct *prev)
 {
 	const struct sched_class *class;
 	struct task_struct *p;
@@ -3186,13 +3186,13 @@ pick_next_task(struct rq *rq)
 	 * the fair class we can call that function directly:
 	 */
 	if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
-		p = fair_sched_class.pick_next_task(rq);
+		p = fair_sched_class.pick_next_task(rq, prev);
 		if (likely(p))
 			return p;
 	}
 
 	for_each_class(class) {
-		p = class->pick_next_task(rq);
+		p = class->pick_next_task(rq, prev);
 		if (p)
 			return p;
 	}
@@ -3253,8 +3253,9 @@ static void __sched __schedule(void)
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
 
-	put_prev_task(rq, prev);
-	next = pick_next_task(rq);
+	if (prev->on_rq || rq->skip_clock_update < 0)
+		update_rq_clock(rq);
+	next = pick_next_task(rq, prev);
 	clear_tsk_need_resched(prev);
 	rq->skip_clock_update = 0;
 
@@ -5200,7 +5201,7 @@ static void migrate_tasks(unsigned int d
 		if (rq->nr_running == 1)
 			break;
 
-		next = pick_next_task(rq);
+		next = pick_next_task(rq, NULL);
 		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
 
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2990,7 +2990,8 @@ static void check_preempt_wakeup(struct 
 		set_last_buddy(se);
 }
 
-static struct task_struct *pick_next_task_fair(struct rq *rq)
+static struct task_struct *
+pick_next_task_fair(struct rq *rq, struct task_struct *prev)
 {
 	struct task_struct *p;
 	struct cfs_rq *cfs_rq = &rq->cfs;
@@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
 	if (!cfs_rq->nr_running)
 		return NULL;
 
+	if (prev)
+		prev->sched_class->put_prev_task(rq, prev);
+
 	do {
 		se = pick_next_entity(cfs_rq);
 		set_next_entity(cfs_rq, se);
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
 	resched_task(rq->idle);
 }
 
-static struct task_struct *pick_next_task_idle(struct rq *rq)
+static struct task_struct *
+pick_next_task_idle(struct rq *rq, struct task_struct *prev)
 {
+	if (prev)
+		prev->sched_class->put_prev_task(rq, prev);
+
 	schedstat_inc(rq, sched_goidle);
 	calc_load_account_idle(rq);
 	return rq->idle;
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1346,15 +1346,7 @@ static struct task_struct *_pick_next_ta
 {
 	struct sched_rt_entity *rt_se;
 	struct task_struct *p;
-	struct rt_rq *rt_rq;
-
-	rt_rq = &rq->rt;
-
-	if (!rt_rq->rt_nr_running)
-		return NULL;
-
-	if (rt_rq_throttled(rt_rq))
-		return NULL;
+	struct rt_rq *rt_rq  = &rq->rt;
 
 	do {
 		rt_se = pick_next_rt_entity(rq, rt_rq);
@@ -1368,9 +1360,22 @@ static struct task_struct *_pick_next_ta
 	return p;
 }
 
-static struct task_struct *pick_next_task_rt(struct rq *rq)
+static struct task_struct *
+pick_next_task_rt(struct rq *rq, struct task_struct *prev)
 {
-	struct task_struct *p = _pick_next_task_rt(rq);
+	struct task_struct *p;
+	struct rt_rq *rt_rq = &rq->rt;
+
+	if (!rt_rq->rt_nr_running)
+		return NULL;
+
+	if (rt_rq_throttled(rt_rq))
+		return NULL;
+
+	if (prev)
+		prev->sched_class->put_prev_task(rq, prev);
+
+	p = _pick_next_task_rt(rq);
 
 	/* The running task is never eligible for pushing */
 	if (p)
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -23,12 +23,17 @@ check_preempt_curr_stop(struct rq *rq, s
 	/* we're never preempted */
 }
 
-static struct task_struct *pick_next_task_stop(struct rq *rq)
+static struct task_struct *
+pick_next_task_stop(struct rq *rq, struct task_struct *prev)
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->on_rq)
+	if (stop && stop->on_rq) {
+		if (prev)
+			prev->sched_class->put_prev_task(rq, prev);
+
 		return stop;
+	}
 
 	return NULL;
 }



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_*
  2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
  2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
  3 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
  To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_3.patch --]
[-- Type: text/plain, Size: 1079 bytes --]

Slightly easier code flow, no functional changes.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched/fair.c |   18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1115,10 +1115,10 @@ static void __clear_buddies_last(struct 
 {
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-		if (cfs_rq->last == se)
-			cfs_rq->last = NULL;
-		else
+		if (cfs_rq->last != se)
 			break;
+
+		cfs_rq->last = NULL;
 	}
 }
 
@@ -1126,10 +1126,10 @@ static void __clear_buddies_next(struct 
 {
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-		if (cfs_rq->next == se)
-			cfs_rq->next = NULL;
-		else
+		if (cfs_rq->next != se)
 			break;
+
+		cfs_rq->next = NULL;
 	}
 }
 
@@ -1137,10 +1137,10 @@ static void __clear_buddies_skip(struct 
 {
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-		if (cfs_rq->skip == se)
-			cfs_rq->skip = NULL;
-		else
+		if (cfs_rq->skip != se)
 			break;
+
+		cfs_rq->skip = NULL;
 	}
 }
 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
  2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
                   ` (2 preceding siblings ...)
  2012-06-14 13:29 ` [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_* Peter Zijlstra
@ 2012-06-14 13:29 ` Peter Zijlstra
  2012-06-14 21:19   ` Peter Zijlstra
  3 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 13:29 UTC (permalink / raw)
  To: mingo, pjt, venki, efault, rostedt, glommer; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: peter_zijlstra-sched-optimize_cgroup_pick_next_task_fair_4.patch --]
[-- Type: text/plain, Size: 3335 bytes --]

Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
path") it is likely we pick a new task from the same cgroup, doing a put
and then set on all intermediate entities is a waste of time, so try to
avoid this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched/fair.c |   75 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 7 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1285,15 +1285,46 @@ wakeup_preempt_entity(struct sched_entit
  */
 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
 {
-	struct sched_entity *se = __pick_first_entity(cfs_rq);
-	struct sched_entity *left = se;
+	struct sched_entity *left = __pick_first_entity(cfs_rq);
+	struct sched_entity *se, *curr = cfs_rq->curr;
+
+	/*
+	 * Since its possible we got here without doing put_prev_entity() we
+	 * also have to consider cfs_rq->curr. If it was set, and is still a
+	 * runnable entity, update_curr() will update its vruntime, otherwise
+	 * forget we've ever seen it.
+	 */
+	if (curr) {
+		if (curr->on_rq)
+			update_curr(cfs_rq);
+		else
+			curr = NULL;
+	}
+
+	/*
+	 * If curr is set we have to see if its left of the leftmost entity
+	 * still in the tree, provided there was anything in the tree at all.
+	 */
+	if (!left || (curr && entity_before(curr, left)))
+		left = curr;
+
+	se = left; /* ideally we run the leftmost entity */
 
 	/*
 	 * Avoid running the skip buddy, if running something else can
 	 * be done without getting too unfair.
 	 */
 	if (cfs_rq->skip == se) {
-		struct sched_entity *second = __pick_next_entity(se);
+		struct sched_entity *second;
+
+		if (se == curr) {
+			second = __pick_first_entity(cfs_rq);
+		} else {
+			second = __pick_next_entity(se);
+			if (!second || (curr && entity_before(curr, second)))
+				second = curr;
+		}
+
 		if (second && wakeup_preempt_entity(second, left) < 1)
 			se = second;
 	}
@@ -2993,23 +3024,53 @@ static void check_preempt_wakeup(struct 
 static struct task_struct *
 pick_next_task_fair(struct rq *rq, struct task_struct *prev)
 {
-	struct task_struct *p;
 	struct cfs_rq *cfs_rq = &rq->cfs;
-	struct sched_entity *se;
+	struct sched_entity *se, *pse;
+	struct task_struct *p;
 
 	if (!cfs_rq->nr_running)
 		return NULL;
 
-	if (prev)
+	if (prev && (prev->sched_class != &fair_sched_class)) {
 		prev->sched_class->put_prev_task(rq, prev);
+		prev = NULL;
+	}
 
 	do {
 		se = pick_next_entity(cfs_rq);
-		set_next_entity(cfs_rq, se);
+		if (!prev)
+			set_next_entity(cfs_rq, se);
 		cfs_rq = group_cfs_rq(se);
 	} while (cfs_rq);
 
 	p = task_of(se);
+
+	/*
+	 * If we haven't yet done put_prev_entity and the selected task is
+	 * a different task than we started out with, try and touch the least
+	 * amount of cfs_rq trees.
+	 */
+	if (prev && prev != p) {
+		pse = &prev->se;
+
+		while (!(cfs_rq = is_same_group(se, pse))) {
+			int se_depth = se->depth;
+			int pse_depth = pse->depth;
+
+			if (se_depth <= pse_depth) {
+				put_prev_entity(cfs_rq_of(pse), pse);
+				pse = parent_entity(pse);
+			}
+			if (se_depth >= pse_depth) {
+				set_next_entity(cfs_rq_of(se), se);
+				se = parent_entity(se);
+			}
+		}
+
+		put_prev_entity(cfs_rq, pse);
+		set_next_entity(cfs_rq, se);
+	}
+
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
@ 2012-06-14 14:32   ` Steven Rostedt
  2012-06-14 14:58     ` Peter Zijlstra
  2012-06-15  9:49   ` Glauber Costa
  2012-06-21  7:35   ` Michael Wang
  2 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2012-06-14 14:32 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, glommer, linux-kernel

On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:


> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3176,7 +3176,7 @@ static void put_prev_task(struct rq *rq,
>   * Pick up the highest-prio task:
>   */
>  static inline struct task_struct *
> -pick_next_task(struct rq *rq)
> +pick_next_task(struct rq *rq, struct task_struct *prev)
>  {
>  	const struct sched_class *class;
>  	struct task_struct *p;
> @@ -3186,13 +3186,13 @@ pick_next_task(struct rq *rq)
>  	 * the fair class we can call that function directly:
>  	 */
>  	if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
> -		p = fair_sched_class.pick_next_task(rq);
> +		p = fair_sched_class.pick_next_task(rq, prev);
>  		if (likely(p))
>  			return p;
>  	}
>  
>  	for_each_class(class) {
> -		p = class->pick_next_task(rq);
> +		p = class->pick_next_task(rq, prev);
>  		if (p)
>  			return p;
>  	}
> @@ -3253,8 +3253,9 @@ static void __sched __schedule(void)
>  	if (unlikely(!rq->nr_running))
>  		idle_balance(cpu, rq);
>  
> -	put_prev_task(rq, prev);

You remove the call to put_prev_task() which looks to be the only user
of this static function. But I don't see the deletion of this function.

-- Steve

> -	next = pick_next_task(rq);
> +	if (prev->on_rq || rq->skip_clock_update < 0)
> +		update_rq_clock(rq);
> +	next = pick_next_task(rq, prev);
>  	clear_tsk_need_resched(prev);
>  	rq->skip_clock_update = 0;
>  
> @@ -5200,7 +5201,7 @@ static void migrate_tasks(unsigned int d
>  		if (rq->nr_running == 1)
>  			break;
>  
> -		next = pick_next_task(rq);
> +		next = pick_next_task(rq, NULL);
>  		BUG_ON(!next);
>  		next->sched_class->put_prev_task(rq, next);
>  



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-14 14:32   ` Steven Rostedt
@ 2012-06-14 14:58     ` Peter Zijlstra
  0 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 14:58 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: mingo, pjt, venki, efault, glommer, linux-kernel

On Thu, 2012-06-14 at 10:32 -0400, Steven Rostedt wrote:
> 
> You remove the call to put_prev_task() which looks to be the only user
> of this static function. But I don't see the deletion of this
> function.
> 
> 
Right you are.. must've overlooked my compiler complaining about this.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
  2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
@ 2012-06-14 21:19   ` Peter Zijlstra
  2012-06-15 14:03     ` Peter Zijlstra
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-14 21:19 UTC (permalink / raw)
  To: mingo; +Cc: pjt, venki, efault, rostedt, glommer, linux-kernel

On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
> Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
> path") it is likely we pick a new task from the same cgroup, doing a put
> and then set on all intermediate entities is a waste of time, so try to
> avoid this. 

I just noticed put_prev_entity() also does the bandwidth enforcement
stuff, I think I just broke that. Will have a peek at fixing that
tomorrow or so.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
  2012-06-14 14:32   ` Steven Rostedt
@ 2012-06-15  9:49   ` Glauber Costa
  2012-06-15 10:03     ` Peter Zijlstra
  2012-06-21  7:35   ` Michael Wang
  2 siblings, 1 reply; 16+ messages in thread
From: Glauber Costa @ 2012-06-15  9:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, linux-kernel

On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
> +static struct task_struct *
> +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
>   {
> -	struct task_struct *p = _pick_next_task_rt(rq);
> +	struct task_struct *p;
> +	struct rt_rq *rt_rq =&rq->rt;
> +
> +	if (!rt_rq->rt_nr_running)
> +		return NULL;
> +
> +	if (rt_rq_throttled(rt_rq))
> +		return NULL;
> +
> +	if (prev)
> +		prev->sched_class->put_prev_task(rq, prev);
> +
it might be me, but this one sounds strange. If the responsibility of 
putting the task now lays with pic_next_task, you can't return NULL 
without doing this first.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-15 10:03     ` Peter Zijlstra
@ 2012-06-15 10:01       ` Glauber Costa
  0 siblings, 0 replies; 16+ messages in thread
From: Glauber Costa @ 2012-06-15 10:01 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, linux-kernel

On 06/15/2012 02:03 PM, Peter Zijlstra wrote:
> On Fri, 2012-06-15 at 13:49 +0400, Glauber Costa wrote:
>> On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
>>> +static struct task_struct *
>>> +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
>>>    {
>>> -	struct task_struct *p = _pick_next_task_rt(rq);
>>> +	struct task_struct *p;
>>> +	struct rt_rq *rt_rq =&rq->rt;
>>> +
>>> +	if (!rt_rq->rt_nr_running)
>>> +		return NULL;
>>> +
>>> +	if (rt_rq_throttled(rt_rq))
>>> +		return NULL;
>>> +
>>> +	if (prev)
>>> +		prev->sched_class->put_prev_task(rq, prev);
>>> +
>> it might be me, but this one sounds strange. If the responsibility of
>> putting the task now lays with pic_next_task, you can't return NULL
>> without doing this first.
>
> Its the responsibility of the sched_class::pick_next_task implementation
> that will return the next task. Not of the first one called.
>
> Clearly this needs a comment.. /me adds.

hummmm, yeah, you are right. The comment does make it clearer.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-15  9:49   ` Glauber Costa
@ 2012-06-15 10:03     ` Peter Zijlstra
  2012-06-15 10:01       ` Glauber Costa
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-15 10:03 UTC (permalink / raw)
  To: Glauber Costa; +Cc: mingo, pjt, venki, efault, rostedt, linux-kernel

On Fri, 2012-06-15 at 13:49 +0400, Glauber Costa wrote:
> On 06/14/2012 05:29 PM, Peter Zijlstra wrote:
> > +static struct task_struct *
> > +pick_next_task_rt(struct rq *rq, struct task_struct *prev)
> >   {
> > -	struct task_struct *p = _pick_next_task_rt(rq);
> > +	struct task_struct *p;
> > +	struct rt_rq *rt_rq =&rq->rt;
> > +
> > +	if (!rt_rq->rt_nr_running)
> > +		return NULL;
> > +
> > +	if (rt_rq_throttled(rt_rq))
> > +		return NULL;
> > +
> > +	if (prev)
> > +		prev->sched_class->put_prev_task(rq, prev);
> > +
> it might be me, but this one sounds strange. If the responsibility of 
> putting the task now lays with pic_next_task, you can't return NULL 
> without doing this first.

Its the responsibility of the sched_class::pick_next_task implementation
that will return the next task. Not of the first one called.

Clearly this needs a comment.. /me adds.

---
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1096,6 +1096,11 @@ struct sched_class {
 
 	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
 
+	/*
+	 * It is the responsibility of the pick_next_task() method that will
+	 * return the next task to call put_prev_task() on the @prev task or
+	 * something equivalent.
+	 */
 	struct task_struct * (*pick_next_task) (struct rq *rq,
 						struct task_struct *prev);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
  2012-06-14 21:19   ` Peter Zijlstra
@ 2012-06-15 14:03     ` Peter Zijlstra
  2012-06-19  8:45       ` Paul Turner
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-15 14:03 UTC (permalink / raw)
  To: mingo; +Cc: pjt, venki, efault, rostedt, glommer, linux-kernel

On Thu, 2012-06-14 at 23:19 +0200, Peter Zijlstra wrote:
> On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
> > Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
> > path") it is likely we pick a new task from the same cgroup, doing a put
> > and then set on all intermediate entities is a waste of time, so try to
> > avoid this. 
> 
> I just noticed put_prev_entity() also does the bandwidth enforcement
> stuff, I think I just broke that. Will have a peek at fixing that
> tomorrow or so.

Damn, that's annoying, that wants to be done bottom-up, while we're now
doing a top-down selection. pjt any sane ideas?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair
  2012-06-15 14:03     ` Peter Zijlstra
@ 2012-06-19  8:45       ` Paul Turner
  0 siblings, 0 replies; 16+ messages in thread
From: Paul Turner @ 2012-06-19  8:45 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, venki, efault, rostedt, glommer, linux-kernel

On Fri, Jun 15, 2012 at 7:03 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, 2012-06-14 at 23:19 +0200, Peter Zijlstra wrote:
>> On Thu, 2012-06-14 at 15:29 +0200, Peter Zijlstra wrote:
>> > Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
>> > path") it is likely we pick a new task from the same cgroup, doing a put
>> > and then set on all intermediate entities is a waste of time, so try to
>> > avoid this.
>>
>> I just noticed put_prev_entity() also does the bandwidth enforcement
>> stuff, I think I just broke that. Will have a peek at fixing that
>> tomorrow or so.
>
> Damn, that's annoying, that wants to be done bottom-up, while we're now
> doing a top-down selection. pjt any sane ideas?

I'll have a look at this tomorrow, but I think this is fairly
immediately resolvable -- I don't see any immediate reason we need to
do  full enforcement here.  The only interesting thing we really need
to do in the put_path is the voluntary return of quota, which -- if we
need to do it -- we will get to do since that occurs iff the entity is
no longe runnable and actually getting a put().

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
  2012-06-14 14:32   ` Steven Rostedt
  2012-06-15  9:49   ` Glauber Costa
@ 2012-06-21  7:35   ` Michael Wang
  2012-06-21  8:41     ` Peter Zijlstra
  2 siblings, 1 reply; 16+ messages in thread
From: Michael Wang @ 2012-06-21  7:35 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, glommer, linux-kernel


> +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
>  {
>  	struct task_struct *p;
>  	struct cfs_rq *cfs_rq = &rq->cfs;
> @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
>  	if (!cfs_rq->nr_running)
>  		return NULL;
> 
> +	if (prev)
> +		prev->sched_class->put_prev_task(rq, prev);
> +
>  	do {
>  		se = pick_next_entity(cfs_rq);
>  		set_next_entity(cfs_rq, se);
> --- a/kernel/sched/idle_task.c
> +++ b/kernel/sched/idle_task.c
> @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
>  	resched_task(rq->idle);
>  }

Will it cause trouble when the last task on cfs_rq was dequeued before
been putted?

The cfs_rq->nr_running will be 0 and we will miss the chance to put
prev, will we?

Regards,
Michael Wang


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-21  7:35   ` Michael Wang
@ 2012-06-21  8:41     ` Peter Zijlstra
  2012-06-21  9:23       ` Michael Wang
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2012-06-21  8:41 UTC (permalink / raw)
  To: Michael Wang; +Cc: mingo, pjt, venki, efault, rostedt, glommer, linux-kernel

On Thu, 2012-06-21 at 15:35 +0800, Michael Wang wrote:
> > +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
> >  {
> >  	struct task_struct *p;
> >  	struct cfs_rq *cfs_rq = &rq->cfs;
> > @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
> >  	if (!cfs_rq->nr_running)
> >  		return NULL;
> > 
> > +	if (prev)
> > +		prev->sched_class->put_prev_task(rq, prev);
> > +
> >  	do {
> >  		se = pick_next_entity(cfs_rq);
> >  		set_next_entity(cfs_rq, se);
> > --- a/kernel/sched/idle_task.c
> > +++ b/kernel/sched/idle_task.c
> > @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
> >  	resched_task(rq->idle);
> >  }
> 
> Will it cause trouble when the last task on cfs_rq was dequeued before
> been putted?
> 
> The cfs_rq->nr_running will be 0 and we will miss the chance to put
> prev, will we?

pick_next_task_idle() will then do the put, no?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task()
  2012-06-21  8:41     ` Peter Zijlstra
@ 2012-06-21  9:23       ` Michael Wang
  0 siblings, 0 replies; 16+ messages in thread
From: Michael Wang @ 2012-06-21  9:23 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, pjt, venki, efault, rostedt, glommer, linux-kernel

On 06/21/2012 04:41 PM, Peter Zijlstra wrote:

> On Thu, 2012-06-21 at 15:35 +0800, Michael Wang wrote:
>>> +pick_next_task_fair(struct rq *rq, struct task_struct *prev)
>>>  {
>>>  	struct task_struct *p;
>>>  	struct cfs_rq *cfs_rq = &rq->cfs;
>>> @@ -2999,6 +3000,9 @@ static struct task_struct *pick_next_tas
>>>  	if (!cfs_rq->nr_running)
>>>  		return NULL;
>>>
>>> +	if (prev)
>>> +		prev->sched_class->put_prev_task(rq, prev);
>>> +
>>>  	do {
>>>  		se = pick_next_entity(cfs_rq);
>>>  		set_next_entity(cfs_rq, se);
>>> --- a/kernel/sched/idle_task.c
>>> +++ b/kernel/sched/idle_task.c
>>> @@ -22,8 +22,12 @@ static void check_preempt_curr_idle(stru
>>>  	resched_task(rq->idle);
>>>  }
>>
>> Will it cause trouble when the last task on cfs_rq was dequeued before
>> been putted?
>>
>> The cfs_rq->nr_running will be 0 and we will miss the chance to put
>> prev, will we?
> 
> pick_next_task_idle() will then do the put, no?

Oh, that's right, I see.

> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2012-06-21  9:23 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-14 13:29 [RFC][PATCH 0/4] sched: Optimize cgroup muck Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 1/4] sched/fair: track cgroup depth Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 2/4] sched: Push put_prev_task() into pick_next_task() Peter Zijlstra
2012-06-14 14:32   ` Steven Rostedt
2012-06-14 14:58     ` Peter Zijlstra
2012-06-15  9:49   ` Glauber Costa
2012-06-15 10:03     ` Peter Zijlstra
2012-06-15 10:01       ` Glauber Costa
2012-06-21  7:35   ` Michael Wang
2012-06-21  8:41     ` Peter Zijlstra
2012-06-21  9:23       ` Michael Wang
2012-06-14 13:29 ` [RFC][PATCH 3/4] sched/fair: clean up __clear_buddies_* Peter Zijlstra
2012-06-14 13:29 ` [RFC][PATCH 4/4] sched/fair: Optimize cgroup pick_next_task_fair Peter Zijlstra
2012-06-14 21:19   ` Peter Zijlstra
2012-06-15 14:03     ` Peter Zijlstra
2012-06-19  8:45       ` Paul Turner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).