[PATCH 0/2] sched: make fair class handle rq/group changes by outside

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/2] sched: make fair class handle rq/group changes by outside
@ 2015-10-05  9:16 byungchul.park
  2015-10-05  9:16 ` [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class byungchul.park
  2015-10-05  9:16 ` [PATCH 2/2] sched: make fair sched class can handle migration " byungchul.park
  0 siblings, 2 replies; 9+ messages in thread
From: byungchul.park @ 2015-10-05  9:16 UTC (permalink / raw)
  To: mingo, peterz; +Cc: linux-kernel, yuyang.du, pjt, efault, tglx, Byungchul Park

From: Byungchul Park <byungchul.park@lge.com>

Current fair sched class handles neither cgroup change nor rq migration
occured within other sched class e.g. rt class. This patch makes it can
do that.

Byungchul Park (2):
  sched: make fair sched class can handle the cgroup change by other
    class
  sched: make fair sched class can handle migration by other class

 include/linux/sched.h |    3 ++
 kernel/sched/core.c   |   34 +++++++++++----
 kernel/sched/fair.c   |  112 +++++++++++++++++++++++++++++++++++++++++--------
 kernel/sched/sched.h  |    6 ++-
 4 files changed, 127 insertions(+), 28 deletions(-)

-- 
1.7.9.5


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-05  9:16 [PATCH 0/2] sched: make fair class handle rq/group changes by outside byungchul.park
@ 2015-10-05  9:16 ` byungchul.park
  2015-10-05  9:37   ` kbuild test robot
  2015-10-13  9:06   ` Peter Zijlstra
  2015-10-05  9:16 ` [PATCH 2/2] sched: make fair sched class can handle migration " byungchul.park
  1 sibling, 2 replies; 9+ messages in thread
From: byungchul.park @ 2015-10-05  9:16 UTC (permalink / raw)
  To: mingo, peterz; +Cc: linux-kernel, yuyang.du, pjt, efault, tglx, Byungchul Park

From: Byungchul Park <byungchul.park@lge.com>

Original fair sched class can handle the cgroup change occured within its
class with task_move_group_fair(), but there is no way to know it if the
change happened outside. This patch makes the fair sched class can handle
the change of cgroup which happened even at other sched class.

Additionally, it makes sched_move_task() more flexable so that any other
sched class can add task_move_group_xx() callback easily in future when
it is needed.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
---
 kernel/sched/core.c  |   17 +++++++++++------
 kernel/sched/fair.c  |   47 ++++++++++++++++++++++++++++++++++++++++-------
 kernel/sched/sched.h |    3 ++-
 3 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a91df61..41b735e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7686,6 +7686,7 @@ void sched_offline_group(struct task_group *tg)
  */
 void sched_move_task(struct task_struct *tsk)
 {
+	const struct sched_class *class;
 	struct task_group *tg;
 	int queued, running;
 	unsigned long flags;
@@ -7711,12 +7712,16 @@ void sched_move_task(struct task_struct *tsk)
 	tg = autogroup_task_group(tsk, tg);
 	tsk->sched_task_group = tg;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	if (tsk->sched_class->task_move_group)
-		tsk->sched_class->task_move_group(tsk);
-	else
-#endif
-		set_task_rq(tsk, task_cpu(tsk));
+
+	for_each_class(class) {
+		if (class->task_move_group_from)
+			class->task_move_group_from(tsk);
+	}
+	set_task_rq(tsk, task_cpu(tsk));
+	for_each_class(class) {
+		if (class->task_move_group_to)
+			class->task_move_group_to(tsk);
+	}
 
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 077076f..e8dabc5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8076,16 +8076,48 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void task_move_group_fair(struct task_struct *p)
+static void task_move_group_from_fair(struct task_struct *p)
 {
-	detach_task_cfs_rq(p);
-	set_task_rq(p, task_cpu(p));
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	if (p->sched_class == &fair_sched_class)
+		detach_task_cfs_rq(p);
+	else if (sched_feat(ATTACH_AGE_LOAD)) {
+		/*
+		 * Detaching was already done by switched_from_fair() when
+		 * the task got off from the fair_sched_class. But now that
+		 * the base cfs_rq for aging is about to be changed, if we
+		 * enabled ATTACH_AGE_LOAD then we perform the aging here
+		 * with the old cfs_rq, as the cfs_rq is not changed yet.
+		 */
+		__update_load_avg(cfs_rq->avg.last_update_time, task_cpu(p),
+				  &se->avg, 0, 0, NULL);
+	}
+}
+
+static void task_move_group_to_fair(struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
+	if (p->sched_class == &fair_sched_class) {
 #ifdef CONFIG_SMP
-	/* Tell se's cfs_rq has been changed -- migrated */
-	p->se.avg.last_update_time = 0;
+		/* Tell se's cfs_rq has been changed -- migrated */
+		se->avg.last_update_time = 0;
 #endif
-	attach_task_cfs_rq(p);
+		attach_task_cfs_rq(p);
+	}
+	else {
+		/*
+		 * Attaching will be done by switched_to_fair() when
+		 * the task is going to switch its sched_class to fair.
+		 * However, se->avg's last_update_time needs to be
+		 * updated here because the base cfs_rq for aging was
+		 * changed right now. (see sched_move_task().)
+		 */
+		se->avg.last_update_time = cfs_rq->avg.last_update_time;
+	}
 }
 
 void free_fair_sched_group(struct task_group *tg)
@@ -8305,7 +8337,8 @@ const struct sched_class fair_sched_class = {
 	.update_curr		= update_curr_fair,
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	.task_move_group	= task_move_group_fair,
+	.task_move_group_from	= task_move_group_from_fair,
+	.task_move_group_to	= task_move_group_to_fair,
 #endif
 };
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index af6f252..9432c66 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1220,7 +1220,8 @@ struct sched_class {
 	void (*update_curr) (struct rq *rq);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*task_move_group) (struct task_struct *p);
+	void (*task_move_group_from) (struct task_struct *p);
+	void (*task_move_group_to) (struct task_struct *p);
 #endif
 };
 
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] sched: make fair sched class can handle migration by other class
  2015-10-05  9:16 [PATCH 0/2] sched: make fair class handle rq/group changes by outside byungchul.park
  2015-10-05  9:16 ` [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class byungchul.park
@ 2015-10-05  9:16 ` byungchul.park
  1 sibling, 0 replies; 9+ messages in thread
From: byungchul.park @ 2015-10-05  9:16 UTC (permalink / raw)
  To: mingo, peterz; +Cc: linux-kernel, yuyang.du, pjt, efault, tglx, Byungchul Park

From: Byungchul Park <byungchul.park@lge.com>

Original fair sched class can handle the migration within its class
with migrate_task_rq_fair(), but there is no way to know it if the
migration happened outside. This patch makes the fair sched class
can handle the migration which happened even at other sched class.
And care of the case that rq lock is not held, is taken properly.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
---
 include/linux/sched.h |    3 +++
 kernel/sched/core.c   |   17 +++++++++++--
 kernel/sched/fair.c   |   65 ++++++++++++++++++++++++++++++++++++++++---------
 kernel/sched/sched.h  |    3 ++-
 4 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 699228b..976cd9c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1235,6 +1235,9 @@ struct sched_entity {
 	struct list_head	group_node;
 	unsigned int		on_rq;
 
+	/* for indicating if a migration has happened. */
+	int			migrated;
+
 	u64			exec_start;
 	u64			sum_exec_runtime;
 	u64			vruntime;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41b735e..d0c3c0b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1264,6 +1264,9 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
+	const struct sched_class *class;
+	int migrated = 0;
+
 #ifdef CONFIG_SCHED_DEBUG
 	/*
 	 * We should never call set_task_cpu() on a blocked task,
@@ -1291,13 +1294,23 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	trace_sched_migrate_task(p, new_cpu);
 
 	if (task_cpu(p) != new_cpu) {
-		if (p->sched_class->migrate_task_rq)
-			p->sched_class->migrate_task_rq(p, new_cpu);
+		for_each_class(class) {
+			if (class->migrate_task_rq_from)
+				class->migrate_task_rq_from(p, new_cpu);
+		}
+		migrated = 1;
 		p->se.nr_migrations++;
 		perf_event_task_migrate(p);
 	}
 
 	__set_task_cpu(p, new_cpu);
+
+	if (migrated) {
+		for_each_class(class) {
+			if (class->migrate_task_rq_to)
+				class->migrate_task_rq_to(p, new_cpu);
+		}
+	}
 }
 
 static void __migrate_swap_task(struct task_struct *p, int cpu)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e8dabc5..31ae787 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2775,9 +2775,9 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct sched_avg *sa = &se->avg;
 	u64 now = cfs_rq_clock_task(cfs_rq);
-	int migrated, decayed;
+	int decayed;
+	int migrated = xchg(&se->migrated, 0);
 
-	migrated = !sa->last_update_time;
 	if (!migrated) {
 		__update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
 			se->on_rq * scale_load_down(se->load.weight),
@@ -2808,11 +2808,8 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
 }
 
-/*
- * Task first catches up with cfs_rq, and then subtract
- * itself from the cfs_rq (task must be off the queue now).
- */
-void remove_entity_load_avg(struct sched_entity *se)
+/* The caller only guarantees p->pi_lock is held. */
+static void __update_entity_load_avg(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 last_update_time;
@@ -2830,11 +2827,28 @@ void remove_entity_load_avg(struct sched_entity *se)
 #endif
 
 	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+}
+
+/* The caller only guarantees p->pi_lock is held. */
+static void __remove_entity_load_avg(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
 	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
 	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
 }
 
 /*
+ * Task first catches up with cfs_rq, and then subtract
+ * itself from the cfs_rq (task must be off the queue now).
+ */
+void remove_entity_load_avg(struct sched_entity *se)
+{
+	__update_entity_load_avg(se);
+	__remove_entity_load_avg(se);
+}
+
+/*
  * Update the rq's load with the elapsed running time before entering
  * idle. if the last scheduled task is not a CFS task, idle_enter will
  * be the only way to update the runnable statistic.
@@ -5009,7 +5023,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
  * previous cpu.  However, the caller only guarantees p->pi_lock is held; no
  * other assumptions, including the state of rq->lock, should be made.
  */
-static void migrate_task_rq_fair(struct task_struct *p, int next_cpu)
+static void migrate_task_rq_from_fair(struct task_struct *p, int next_cpu)
 {
 	/*
 	 * We are supposed to update the task to "current" time, then its up to date
@@ -5018,15 +5032,43 @@ static void migrate_task_rq_fair(struct task_struct *p, int next_cpu)
 	 * will result in the wakee task is less decayed, but giving the wakee more
 	 * load sounds not bad.
 	 */
-	remove_entity_load_avg(&p->se);
+	__update_entity_load_avg(&p->se);
+	if (p->sched_class == &fair_sched_class)
+		__remove_entity_load_avg(&p->se);
 
 	/* Tell new CPU we are migrated */
-	p->se.avg.last_update_time = 0;
+	p->se.migrated = 1;
 
 	/* We have migrated, no longer consider this task hot */
 	p->se.exec_start = 0;
 }
 
+/*
+ * Called immediately after a task is migrated to a new cpu; task_cpu(p) and
+ * cfs_rq_of(p) references at time of call identify the next cpu.  However,
+ * the caller only guarantees p->pi_lock is held; no other assumptions,
+ * including the state of rq->lock, should be made.
+ */
+static void migrate_task_rq_to_fair(struct task_struct *p, int next_cpu)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(&p->se);
+	u64 last_update_time;
+
+#ifndef CONFIG_64BIT
+	u64 last_update_time_copy;
+
+	do {
+		last_update_time_copy = cfs_rq->load_last_update_time_copy;
+		smp_rmb();
+		last_update_time = cfs_rq->avg.last_update_time;
+	} while (last_update_time != last_update_time_copy);
+#else
+	last_update_time = cfs_rq->avg.last_update_time;
+#endif
+
+	p->se.avg.last_update_time = last_update_time;
+}
+
 static void task_dead_fair(struct task_struct *p)
 {
 	remove_entity_load_avg(&p->se);
@@ -8314,7 +8356,8 @@ const struct sched_class fair_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_fair,
-	.migrate_task_rq	= migrate_task_rq_fair,
+	.migrate_task_rq_from	= migrate_task_rq_from_fair,
+	.migrate_task_rq_to	= migrate_task_rq_to_fair,
 
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9432c66..dd19c30 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1187,7 +1187,8 @@ struct sched_class {
 
 #ifdef CONFIG_SMP
 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
-	void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
+	void (*migrate_task_rq_from)(struct task_struct *p, int next_cpu);
+	void (*migrate_task_rq_to)(struct task_struct *p, int next_cpu);
 
 	void (*task_waking) (struct task_struct *task);
 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-05  9:16 ` [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class byungchul.park
@ 2015-10-05  9:37   ` kbuild test robot
  2015-10-13  9:06   ` Peter Zijlstra
  1 sibling, 0 replies; 9+ messages in thread
From: kbuild test robot @ 2015-10-05  9:37 UTC (permalink / raw)
  To: byungchul.park
  Cc: kbuild-all, mingo, peterz, linux-kernel, yuyang.du, pjt, efault,
	tglx, Byungchul Park

[-- Attachment #1: Type: text/plain, Size: 2028 bytes --]

Hi Byungchul,

[auto build test ERROR on next-20151002 -- if it's inappropriate base, please ignore]

config: i386-randconfig-x009-201540 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/sched/fair.c: In function 'task_move_group_from_fair':
>> kernel/sched/fair.c:8094:3: error: implicit declaration of function '__update_load_avg' [-Werror=implicit-function-declaration]
      __update_load_avg(cfs_rq->avg.last_update_time, task_cpu(p),
      ^
>> kernel/sched/fair.c:8094:27: error: 'struct cfs_rq' has no member named 'avg'
      __update_load_avg(cfs_rq->avg.last_update_time, task_cpu(p),
                              ^
>> kernel/sched/fair.c:8095:10: error: 'struct sched_entity' has no member named 'avg'
          &se->avg, 0, 0, NULL);
             ^
   kernel/sched/fair.c: In function 'task_move_group_to_fair':
   kernel/sched/fair.c:8119:5: error: 'struct sched_entity' has no member named 'avg'
      se->avg.last_update_time = cfs_rq->avg.last_update_time;
        ^
   kernel/sched/fair.c:8119:36: error: 'struct cfs_rq' has no member named 'avg'
      se->avg.last_update_time = cfs_rq->avg.last_update_time;
                                       ^
   cc1: some warnings being treated as errors

vim +/__update_load_avg +8094 kernel/sched/fair.c

  8088			 * Detaching was already done by switched_from_fair() when
  8089			 * the task got off from the fair_sched_class. But now that
  8090			 * the base cfs_rq for aging is about to be changed, if we
  8091			 * enabled ATTACH_AGE_LOAD then we perform the aging here
  8092			 * with the old cfs_rq, as the cfs_rq is not changed yet.
  8093			 */
> 8094			__update_load_avg(cfs_rq->avg.last_update_time, task_cpu(p),
> 8095					  &se->avg, 0, 0, NULL);
  8096		}
  8097	}
  8098	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 24445 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-05  9:16 ` [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class byungchul.park
  2015-10-05  9:37   ` kbuild test robot
@ 2015-10-13  9:06   ` Peter Zijlstra
  2015-10-13 11:26     ` Byungchul Park
  1 sibling, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2015-10-13  9:06 UTC (permalink / raw)
  To: byungchul.park; +Cc: mingo, linux-kernel, yuyang.du, pjt, efault, tglx

On Mon, Oct 05, 2015 at 06:16:23PM +0900, byungchul.park@lge.com wrote:
> From: Byungchul Park <byungchul.park@lge.com>
> 
> Original fair sched class can handle the cgroup change occured within its
> class with task_move_group_fair(), but there is no way to know it if the
> change happened outside. This patch makes the fair sched class can handle
> the change of cgroup which happened even at other sched class.
> 
> Additionally, it makes sched_move_task() more flexable so that any other
> sched class can add task_move_group_xx() callback easily in future when
> it is needed.

I don't get the problem... when !fair, set_task_rq() will do what needs
doing.

The only reason we need task_move_group_fair() is the extra accounting
required when we actually _are_ of the fair class, it needs to
unaccount, move and reaccount.

If we're not fair, the whole switched_from/to stuff should do that for
us, no?

So please explain the problem.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-13  9:06   ` Peter Zijlstra
@ 2015-10-13 11:26     ` Byungchul Park
  2015-10-13 12:04       ` Peter Zijlstra
  0 siblings, 1 reply; 9+ messages in thread
From: Byungchul Park @ 2015-10-13 11:26 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, linux-kernel, yuyang.du, pjt, efault, tglx

On Tue, Oct 13, 2015 at 11:06:54AM +0200, Peter Zijlstra wrote:
> On Mon, Oct 05, 2015 at 06:16:23PM +0900, byungchul.park@lge.com wrote:
> > From: Byungchul Park <byungchul.park@lge.com>
> > 
> > Original fair sched class can handle the cgroup change occured within its
> > class with task_move_group_fair(), but there is no way to know it if the
> > change happened outside. This patch makes the fair sched class can handle
> > the change of cgroup which happened even at other sched class.
> > 
> > Additionally, it makes sched_move_task() more flexable so that any other
> > sched class can add task_move_group_xx() callback easily in future when
> > it is needed.
> 
> I don't get the problem... when !fair, set_task_rq() will do what needs
> doing.

set_task_rq() changes se's cfs_rq properly.

> 
> The only reason we need task_move_group_fair() is the extra accounting
> required when we actually _are_ of the fair class, it needs to
> unaccount, move and reaccount.

i agree with you mostly. but let's consider following sequence.

1. switch se's class from fair to rt
2. change se's group within the rt class
3. switch se's class back to fair

now, se->avg.last_update_time has a wrong value which is not synced with
the current cfs_rq yet before calling attach_entity_load_avg(). so
ATTACH_AGE_LOAD won't work expectedly. to be honest with you, no problem
if we disable ATTACH_AGE_LOAD. but i think ATTACH_AGE_LOAD is a valuable
feature, so i hope this patch will be added so that the ATTACH_AGE_LOAD
feature works properly.

this patch can add a very small overhead when changing se's group, but
i think that kind of small overhead is reasonable because those events
hardly occure. in addition, please consider similar kind of problem
solved in 2/2 patch. migration in the rt class also causes same problem.
i also considered flexability of code for adding each sched class's
callback functions when it needs in future.

IMHO, it is clear that these 2 patches makes ATTACH_AGE_LOAD work more
properly. but if you think the overhead introduced by these patches
is not reasonable, please let me know. i will follow your decision.

> 
> If we're not fair, the whole switched_from/to stuff should do that for
> us, no?
> 
> So please explain the problem.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-13 11:26     ` Byungchul Park
@ 2015-10-13 12:04       ` Peter Zijlstra
  2015-10-13 12:07         ` Peter Zijlstra
  2015-10-13 23:58         ` Byungchul Park
  0 siblings, 2 replies; 9+ messages in thread
From: Peter Zijlstra @ 2015-10-13 12:04 UTC (permalink / raw)
  To: Byungchul Park; +Cc: mingo, linux-kernel, yuyang.du, pjt, efault, tglx

On Tue, Oct 13, 2015 at 08:26:45PM +0900, Byungchul Park wrote:
> On Tue, Oct 13, 2015 at 11:06:54AM +0200, Peter Zijlstra wrote:
> > On Mon, Oct 05, 2015 at 06:16:23PM +0900, byungchul.park@lge.com wrote:
> > > From: Byungchul Park <byungchul.park@lge.com>
> > > 
> > > Original fair sched class can handle the cgroup change occured within its
> > > class with task_move_group_fair(), but there is no way to know it if the
> > > change happened outside. This patch makes the fair sched class can handle
> > > the change of cgroup which happened even at other sched class.
> > > 
> > > Additionally, it makes sched_move_task() more flexable so that any other
> > > sched class can add task_move_group_xx() callback easily in future when
> > > it is needed.
> > 
> > I don't get the problem... when !fair, set_task_rq() will do what needs
> > doing.
> 
> set_task_rq() changes se's cfs_rq properly.
> 
> > 
> > The only reason we need task_move_group_fair() is the extra accounting
> > required when we actually _are_ of the fair class, it needs to
> > unaccount, move and reaccount.
> 
> i agree with you mostly. but let's consider following sequence.
> 
> 1. switch se's class from fair to rt
> 2. change se's group within the rt class
> 3. switch se's class back to fair
> 
> now, se->avg.last_update_time has a wrong value which is not synced with
> the current cfs_rq yet before calling attach_entity_load_avg(). so
> ATTACH_AGE_LOAD won't work expectedly. to be honest with you, no problem
> if we disable ATTACH_AGE_LOAD. but i think ATTACH_AGE_LOAD is a valuable
> feature, so i hope this patch will be added so that the ATTACH_AGE_LOAD
> feature works properly.

Ah, see details like that make or break a Changelog, since you've
clearly thought about it, you might as well write it down and safe me
the trouble of trying to puzzle it out on me own ;-)

OK, now that I understand the problem, let me consider it a bit.

One alternative solution would be to make set_task_rq() do the clear,
right?

---
 kernel/sched/fair.c  | 8 --------
 kernel/sched/sched.h | 4 ++++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 700eb548315f..9469f023ed74 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5020,9 +5020,6 @@ static void migrate_task_rq_fair(struct task_struct *p)
 	 */
 	remove_entity_load_avg(&p->se);
 
-	/* Tell new CPU we are migrated */
-	p->se.avg.last_update_time = 0;
-
 	/* We have migrated, no longer consider this task hot */
 	p->se.exec_start = 0;
 }
@@ -8080,11 +8077,6 @@ static void task_move_group_fair(struct task_struct *p)
 {
 	detach_task_cfs_rq(p);
 	set_task_rq(p, task_cpu(p));
-
-#ifdef CONFIG_SMP
-	/* Tell se's cfs_rq has been changed -- migrated */
-	p->se.avg.last_update_time = 0;
-#endif
 	attach_task_cfs_rq(p);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efd3bfc7e347..f5c39cb83ee5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -935,6 +935,10 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	p->se.cfs_rq = tg->cfs_rq[cpu];
 	p->se.parent = tg->se[cpu];
+#ifdef CONFIG_SMP
+	/* Tell se's cfs_rq has been changed -- migrated */
+	p->se.avg.last_update_time = 0;
+#endif
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-13 12:04       ` Peter Zijlstra
@ 2015-10-13 12:07         ` Peter Zijlstra
  2015-10-13 23:58         ` Byungchul Park
  1 sibling, 0 replies; 9+ messages in thread
From: Peter Zijlstra @ 2015-10-13 12:07 UTC (permalink / raw)
  To: Byungchul Park; +Cc: mingo, linux-kernel, yuyang.du, pjt, efault, tglx

On Tue, Oct 13, 2015 at 02:04:38PM +0200, Peter Zijlstra wrote:
> On Tue, Oct 13, 2015 at 08:26:45PM +0900, Byungchul Park wrote:
> > On Tue, Oct 13, 2015 at 11:06:54AM +0200, Peter Zijlstra wrote:
> > > On Mon, Oct 05, 2015 at 06:16:23PM +0900, byungchul.park@lge.com wrote:
> > > > From: Byungchul Park <byungchul.park@lge.com>
> > > > 
> > > > Original fair sched class can handle the cgroup change occured within its
> > > > class with task_move_group_fair(), but there is no way to know it if the
> > > > change happened outside. This patch makes the fair sched class can handle
> > > > the change of cgroup which happened even at other sched class.
> > > > 
> > > > Additionally, it makes sched_move_task() more flexable so that any other
> > > > sched class can add task_move_group_xx() callback easily in future when
> > > > it is needed.
> > > 
> > > I don't get the problem... when !fair, set_task_rq() will do what needs
> > > doing.
> > 
> > set_task_rq() changes se's cfs_rq properly.
> > 
> > > 
> > > The only reason we need task_move_group_fair() is the extra accounting
> > > required when we actually _are_ of the fair class, it needs to
> > > unaccount, move and reaccount.
> > 
> > i agree with you mostly. but let's consider following sequence.
> > 
> > 1. switch se's class from fair to rt
> > 2. change se's group within the rt class
> > 3. switch se's class back to fair
> > 
> > now, se->avg.last_update_time has a wrong value which is not synced with
> > the current cfs_rq yet before calling attach_entity_load_avg(). so
> > ATTACH_AGE_LOAD won't work expectedly. to be honest with you, no problem
> > if we disable ATTACH_AGE_LOAD. but i think ATTACH_AGE_LOAD is a valuable
> > feature, so i hope this patch will be added so that the ATTACH_AGE_LOAD
> > feature works properly.
> 
> Ah, see details like that make or break a Changelog, since you've
> clearly thought about it, you might as well write it down and safe me
> the trouble of trying to puzzle it out on me own ;-)
> 
> OK, now that I understand the problem, let me consider it a bit.

So the problem I have with your approach is that I would prefer to
isolate the classes as much as possible. If its not currently of a
class, we should not call into it.

Now, there's a few exceptions to that already, but I would really prefer
not to make it worse.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class
  2015-10-13 12:04       ` Peter Zijlstra
  2015-10-13 12:07         ` Peter Zijlstra
@ 2015-10-13 23:58         ` Byungchul Park
  1 sibling, 0 replies; 9+ messages in thread
From: Byungchul Park @ 2015-10-13 23:58 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, linux-kernel, yuyang.du, pjt, efault, tglx

On Tue, Oct 13, 2015 at 02:04:38PM +0200, Peter Zijlstra wrote:
> On Tue, Oct 13, 2015 at 08:26:45PM +0900, Byungchul Park wrote:
> > On Tue, Oct 13, 2015 at 11:06:54AM +0200, Peter Zijlstra wrote:
> > > On Mon, Oct 05, 2015 at 06:16:23PM +0900, byungchul.park@lge.com wrote:
> > > > From: Byungchul Park <byungchul.park@lge.com>
> > > > 
> > > > Original fair sched class can handle the cgroup change occured within its
> > > > class with task_move_group_fair(), but there is no way to know it if the
> > > > change happened outside. This patch makes the fair sched class can handle
> > > > the change of cgroup which happened even at other sched class.
> > > > 
> > > > Additionally, it makes sched_move_task() more flexable so that any other
> > > > sched class can add task_move_group_xx() callback easily in future when
> > > > it is needed.
> > > 
> > > I don't get the problem... when !fair, set_task_rq() will do what needs
> > > doing.
> > 
> > set_task_rq() changes se's cfs_rq properly.
> > 
> > > 
> > > The only reason we need task_move_group_fair() is the extra accounting
> > > required when we actually _are_ of the fair class, it needs to
> > > unaccount, move and reaccount.
> > 
> > i agree with you mostly. but let's consider following sequence.
> > 
> > 1. switch se's class from fair to rt
> > 2. change se's group within the rt class
> > 3. switch se's class back to fair
> > 
> > now, se->avg.last_update_time has a wrong value which is not synced with
> > the current cfs_rq yet before calling attach_entity_load_avg(). so
> > ATTACH_AGE_LOAD won't work expectedly. to be honest with you, no problem
> > if we disable ATTACH_AGE_LOAD. but i think ATTACH_AGE_LOAD is a valuable
> > feature, so i hope this patch will be added so that the ATTACH_AGE_LOAD
> > feature works properly.
> 
> Ah, see details like that make or break a Changelog, since you've
> clearly thought about it, you might as well write it down and safe me
> the trouble of trying to puzzle it out on me own ;-)

i am sorry for that, i will try to add more description on patches in
future.

> 
> OK, now that I understand the problem, let me consider it a bit.
> 
> One alternative solution would be to make set_task_rq() do the clear,
> right?

yes. i re-implemented it last night just within my head. fortunately, it
is similar to what you recommended. but i am not sure if it is good to
reset p->se.avg.last_update_time unconditionally for all cases calling
set_task_rq(). let me think about it more.

thank you,
byungchul

> 
> ---
>  kernel/sched/fair.c  | 8 --------
>  kernel/sched/sched.h | 4 ++++
>  2 files changed, 4 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 700eb548315f..9469f023ed74 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5020,9 +5020,6 @@ static void migrate_task_rq_fair(struct task_struct *p)
>  	 */
>  	remove_entity_load_avg(&p->se);
>  
> -	/* Tell new CPU we are migrated */
> -	p->se.avg.last_update_time = 0;
> -
>  	/* We have migrated, no longer consider this task hot */
>  	p->se.exec_start = 0;
>  }
> @@ -8080,11 +8077,6 @@ static void task_move_group_fair(struct task_struct *p)
>  {
>  	detach_task_cfs_rq(p);
>  	set_task_rq(p, task_cpu(p));
> -
> -#ifdef CONFIG_SMP
> -	/* Tell se's cfs_rq has been changed -- migrated */
> -	p->se.avg.last_update_time = 0;
> -#endif
>  	attach_task_cfs_rq(p);
>  }
>  
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index efd3bfc7e347..f5c39cb83ee5 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -935,6 +935,10 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>  	p->se.cfs_rq = tg->cfs_rq[cpu];
>  	p->se.parent = tg->se[cpu];
> +#ifdef CONFIG_SMP
> +	/* Tell se's cfs_rq has been changed -- migrated */
> +	p->se.avg.last_update_time = 0;
> +#endif
>  #endif
>  
>  #ifdef CONFIG_RT_GROUP_SCHED
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-10-14  0:00 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-05  9:16 [PATCH 0/2] sched: make fair class handle rq/group changes by outside byungchul.park
2015-10-05  9:16 ` [PATCH 1/2] sched: make fair sched class can handle the cgroup change by other class byungchul.park
2015-10-05  9:37   ` kbuild test robot
2015-10-13  9:06   ` Peter Zijlstra
2015-10-13 11:26     ` Byungchul Park
2015-10-13 12:04       ` Peter Zijlstra
2015-10-13 12:07         ` Peter Zijlstra
2015-10-13 23:58         ` Byungchul Park
2015-10-05  9:16 ` [PATCH 2/2] sched: make fair sched class can handle migration " byungchul.park

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).