linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
@ 2016-05-24  8:57 Vincent Guittot
  2016-05-24  9:55 ` Vincent Guittot
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Vincent Guittot @ 2016-05-24  8:57 UTC (permalink / raw)
  To: peterz, mingo, linux-kernel, pjt
  Cc: yuyang.du, dietmar.eggemann, bsegall, Morten.Rasmussen, Vincent Guittot

Ensure that the changes of the utilization of a sched_entity will be
reflected in the task_group hierarchy.

This patch tries another way than the flat utilization hierarchy proposal
to ensure the changes will be propagated down to the root cfs.

The way to compute the sched average metrics stays the same so the
utilization only need to be synced with the local cfs rq timestamp.

Changes since v1:
- This patch needs the patch that fixes issue with rq->leaf_cfs_rq_list
  "sched: fix hierarchical order in rq->leaf_cfs_rq_list" in order to work
  correctly. I haven't sent them as a single patchset because the fix is
  independant of this one
- Merge some functions that are always used together
- During update of blocked load, ensure that the sched_entity is synced
  with the cfs_rq applying changes
- Fix an issue when task changes its cpu affinity

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c  | 168 ++++++++++++++++++++++++++++++++++++++++++++-------
 kernel/sched/sched.h |   1 +
 2 files changed, 147 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 07f0f1b..2714e31 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2591,6 +2591,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
 
 	reweight_entity(cfs_rq_of(se), se, shares);
 }
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
 {
@@ -2817,6 +2818,28 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 	return decayed;
 }
 
+#ifndef CONFIG_64BIT
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+	u64 last_update_time_copy;
+	u64 last_update_time;
+
+	do {
+		last_update_time_copy = cfs_rq->load_last_update_time_copy;
+		smp_rmb();
+		last_update_time = cfs_rq->avg.last_update_time;
+	} while (last_update_time != last_update_time_copy);
+
+	return last_update_time;
+}
+#else
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->avg.last_update_time;
+}
+#endif
+
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * Updating tg's load_avg is necessary before update_cfs_share (which is done)
@@ -2884,8 +2907,86 @@ void set_task_rq_fair(struct sched_entity *se,
 		se->avg.last_update_time = n_last_update_time;
 	}
 }
+
+/*
+ * Save how much utilization has just been added/removed on cfs rq so we can
+ * propagate it across the whole tg tree
+ */
+static void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta)
+{
+	if (cfs_rq->tg == &root_task_group)
+		return;
+
+	cfs_rq->diff_util_avg += delta;
+}
+
+/* Take into account the change of the utilization of a child task group */
+static void update_tg_cfs_util(struct sched_entity *se, int blocked)
+{
+	int delta;
+	struct cfs_rq *cfs_rq;
+	long update_util_avg;
+	long last_update_time;
+	long old_util_avg;
+
+
+	/*
+	 * update_blocked_average will call this function for root cfs_rq
+	 * whose se is null. In this case just return
+	 */
+	if (!se)
+		return;
+
+	if (entity_is_task(se))
+		return 0;
+
+	/* Get sched_entity of cfs rq */
+	cfs_rq = group_cfs_rq(se);
+
+	update_util_avg = cfs_rq->diff_util_avg;
+
+	if (!update_util_avg)
+		return 0;
+
+	/* Clear pending changes */
+	cfs_rq->diff_util_avg = 0;
+
+	/* Add changes in sched_entity utilizaton */
+	old_util_avg = se->avg.util_avg;
+	se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
+	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+
+	/* Get parent cfs_rq */
+	cfs_rq = cfs_rq_of(se);
+
+	if (blocked) {
+		/*
+		 * blocked utilization has to be synchronized with its parent
+		 * cfs_rq's timestamp
+		 */
+		last_update_time = cfs_rq_last_update_time(cfs_rq);
+
+		__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)),
+			  &se->avg,
+			  se->on_rq * scale_load_down(se->load.weight),
+			  cfs_rq->curr == se, NULL);
+	}
+
+	delta = se->avg.util_avg - old_util_avg;
+
+	cfs_rq->avg.util_avg =  max_t(long, cfs_rq->avg.util_avg + delta, 0);
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+
+	set_tg_cfs_rq_util(cfs_rq, delta);
+}
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
+
+static inline void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta) {}
+
+static inline void update_tg_cfs_util(struct sched_entity *se, int sync) {}
+
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
@@ -2925,6 +3026,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 {
 	struct sched_avg *sa = &cfs_rq->avg;
 	int decayed, removed_load = 0, removed_util = 0;
+	int old_util_avg = sa->util_avg;
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@@ -2947,6 +3049,8 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 	smp_wmb();
 	cfs_rq->load_last_update_time_copy = sa->last_update_time;
 #endif
+	if (removed_util)
+		set_tg_cfs_rq_util(cfs_rq, sa->util_avg - old_util_avg);
 
 	if (update_freq && (decayed || removed_util))
 		cfs_rq_util_change(cfs_rq);
@@ -3000,6 +3104,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	cfs_rq->avg.util_avg += se->avg.util_avg;
 	cfs_rq->avg.util_sum += se->avg.util_sum;
 
+	set_tg_cfs_rq_util(cfs_rq, se->avg.util_avg);
+
 	cfs_rq_util_change(cfs_rq);
 }
 
@@ -3008,12 +3114,13 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
 			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
 			  cfs_rq->curr == se, NULL);
-
 	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
 	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
 	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
 	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
 
+	set_tg_cfs_rq_util(cfs_rq, -se->avg.util_avg);
+
 	cfs_rq_util_change(cfs_rq);
 }
 
@@ -3056,27 +3163,6 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
 }
 
-#ifndef CONFIG_64BIT
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
-	u64 last_update_time_copy;
-	u64 last_update_time;
-
-	do {
-		last_update_time_copy = cfs_rq->load_last_update_time_copy;
-		smp_rmb();
-		last_update_time = cfs_rq->avg.last_update_time;
-	} while (last_update_time != last_update_time_copy);
-
-	return last_update_time;
-}
-#else
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->avg.last_update_time;
-}
-#endif
-
 /*
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
@@ -3287,6 +3373,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
+	update_tg_cfs_util(se, false);
 
 	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
@@ -3388,6 +3475,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	update_min_vruntime(cfs_rq);
 	update_cfs_shares(cfs_rq);
+	update_tg_cfs_util(se, false);
+
 }
 
 /*
@@ -3565,6 +3654,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	 */
 	update_load_avg(curr, 1);
 	update_cfs_shares(cfs_rq);
+	update_tg_cfs_util(curr, false);
 
 #ifdef CONFIG_SCHED_HRTICK
 	/*
@@ -4438,6 +4528,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		update_load_avg(se, 1);
 		update_cfs_shares(cfs_rq);
+		update_tg_cfs_util(se, false);
 	}
 
 	if (!se)
@@ -4498,6 +4589,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		update_load_avg(se, 1);
 		update_cfs_shares(cfs_rq);
+		update_tg_cfs_util(se, false);
 	}
 
 	if (!se)
@@ -6276,6 +6368,8 @@ static void update_blocked_averages(int cpu)
 
 		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
 			update_tg_load_avg(cfs_rq, 0);
+		/* Propagate pending util changes to the parent */
+		update_tg_cfs_util(cfs_rq->tg->se[cpu], true);
 	}
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
@@ -8370,6 +8464,20 @@ static void detach_task_cfs_rq(struct task_struct *p)
 
 	/* Catch up with the cfs_rq and remove our load when we leave */
 	detach_entity_load_avg(cfs_rq, se);
+
+	/*
+	 * Propagate the detach across the tg tree to ake it visible to the
+	 * root
+	 */
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+
+		update_load_avg(se, 1);
+		update_tg_cfs_util(se, false);
+	}
 }
 
 static void attach_task_cfs_rq(struct task_struct *p)
@@ -8399,8 +8507,21 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
 
 static void switched_to_fair(struct rq *rq, struct task_struct *p)
 {
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq;
+
 	attach_task_cfs_rq(p);
 
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+
+		update_load_avg(se, 1);
+		update_tg_cfs_util(se, false);
+	}
+
 	if (task_on_rq_queued(p)) {
 		/*
 		 * We were most likely switched from sched_rt, so
@@ -8443,6 +8564,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 	atomic_long_set(&cfs_rq->removed_load_avg, 0);
 	atomic_long_set(&cfs_rq->removed_util_avg, 0);
 #endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	cfs_rq->diff_util_avg = 0;
+#endif
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9693fe9..808d1b1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -389,6 +389,7 @@ struct cfs_rq {
 	unsigned long runnable_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	unsigned long tg_load_avg_contrib;
+	long diff_util_avg;
 #endif
 	atomic_long_t removed_load_avg, removed_util_avg;
 #ifndef CONFIG_64BIT
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-05-24  8:57 [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization Vincent Guittot
@ 2016-05-24  9:55 ` Vincent Guittot
  2016-06-06 10:52   ` Dietmar Eggemann
  2016-06-01 12:54 ` Peter Zijlstra
  2016-06-01 13:36 ` Peter Zijlstra
  2 siblings, 1 reply; 9+ messages in thread
From: Vincent Guittot @ 2016-05-24  9:55 UTC (permalink / raw)
  To: peterz, mingo, linux-kernel, pjt
  Cc: yuyang.du, dietmar.eggemann, bsegall, Morten.Rasmussen, Vincent Guittot

Ensure that the changes of the utilization of a sched_entity will be
reflected in the task_group hierarchy.

This patch tries another way than the flat utilization hierarchy proposal
to ensure the changes will be propagated down to the root cfs.

The way to compute the sched average metrics stays the same so the
utilization only need to be synced with the local cfs rq timestamp.

Changes since v1:
- This patch needs the patch that fixes issue with rq->leaf_cfs_rq_list
  "sched: fix hierarchical order in rq->leaf_cfs_rq_list" in order to work
  correctly. I haven't sent them as a single patchset because the fix is
  independant of this one
- Merge some functions that are always used together
- During update of blocked load, ensure that the sched_entity is synced
  with the cfs_rq applying changes
- Fix an issue when task changes its cpu affinity

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---

Hi,

Same version but rebased on latest sched/core

Regards,
Vincent

 kernel/sched/fair.c  | 168 ++++++++++++++++++++++++++++++++++++++++++++-------
 kernel/sched/sched.h |   1 +
 2 files changed, 147 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6d3fbf2..19475e61 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2591,6 +2591,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
 
 	reweight_entity(cfs_rq_of(se), se, shares);
 }
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
 {
@@ -2818,6 +2819,28 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 	return decayed;
 }
 
+#ifndef CONFIG_64BIT
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+	u64 last_update_time_copy;
+	u64 last_update_time;
+
+	do {
+		last_update_time_copy = cfs_rq->load_last_update_time_copy;
+		smp_rmb();
+		last_update_time = cfs_rq->avg.last_update_time;
+	} while (last_update_time != last_update_time_copy);
+
+	return last_update_time;
+}
+#else
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->avg.last_update_time;
+}
+#endif
+
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * Updating tg's load_avg is necessary before update_cfs_share (which is done)
@@ -2885,8 +2908,86 @@ void set_task_rq_fair(struct sched_entity *se,
 		se->avg.last_update_time = n_last_update_time;
 	}
 }
+
+/*
+ * Save how much utilization has just been added/removed on cfs rq so we can
+ * propagate it across the whole tg tree
+ */
+static void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta)
+{
+	if (cfs_rq->tg == &root_task_group)
+		return;
+
+	cfs_rq->diff_util_avg += delta;
+}
+
+/* Take into account the change of the utilization of a child task group */
+static void update_tg_cfs_util(struct sched_entity *se, int blocked)
+{
+	int delta;
+	struct cfs_rq *cfs_rq;
+	long update_util_avg;
+	long last_update_time;
+	long old_util_avg;
+
+
+	/*
+	 * update_blocked_average will call this function for root cfs_rq
+	 * whose se is null. In this case just return
+	 */
+	if (!se)
+		return;
+
+	if (entity_is_task(se))
+		return 0;
+
+	/* Get sched_entity of cfs rq */
+	cfs_rq = group_cfs_rq(se);
+
+	update_util_avg = cfs_rq->diff_util_avg;
+
+	if (!update_util_avg)
+		return 0;
+
+	/* Clear pending changes */
+	cfs_rq->diff_util_avg = 0;
+
+	/* Add changes in sched_entity utilizaton */
+	old_util_avg = se->avg.util_avg;
+	se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
+	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+
+	/* Get parent cfs_rq */
+	cfs_rq = cfs_rq_of(se);
+
+	if (blocked) {
+		/*
+		 * blocked utilization has to be synchronized with its parent
+		 * cfs_rq's timestamp
+		 */
+		last_update_time = cfs_rq_last_update_time(cfs_rq);
+
+		__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)),
+			  &se->avg,
+			  se->on_rq * scale_load_down(se->load.weight),
+			  cfs_rq->curr == se, NULL);
+	}
+
+	delta = se->avg.util_avg - old_util_avg;
+
+	cfs_rq->avg.util_avg =  max_t(long, cfs_rq->avg.util_avg + delta, 0);
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+
+	set_tg_cfs_rq_util(cfs_rq, delta);
+}
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
+
+static inline void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta) {}
+
+static inline void update_tg_cfs_util(struct sched_entity *se, int sync) {}
+
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
@@ -2926,6 +3027,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 {
 	struct sched_avg *sa = &cfs_rq->avg;
 	int decayed, removed_load = 0, removed_util = 0;
+	int old_util_avg = sa->util_avg;
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@@ -2948,6 +3050,8 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 	smp_wmb();
 	cfs_rq->load_last_update_time_copy = sa->last_update_time;
 #endif
+	if (removed_util)
+		set_tg_cfs_rq_util(cfs_rq, sa->util_avg - old_util_avg);
 
 	if (update_freq && (decayed || removed_util))
 		cfs_rq_util_change(cfs_rq);
@@ -3001,6 +3105,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	cfs_rq->avg.util_avg += se->avg.util_avg;
 	cfs_rq->avg.util_sum += se->avg.util_sum;
 
+	set_tg_cfs_rq_util(cfs_rq, se->avg.util_avg);
+
 	cfs_rq_util_change(cfs_rq);
 }
 
@@ -3009,12 +3115,13 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
 			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
 			  cfs_rq->curr == se, NULL);
-
 	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
 	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
 	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
 	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
 
+	set_tg_cfs_rq_util(cfs_rq, -se->avg.util_avg);
+
 	cfs_rq_util_change(cfs_rq);
 }
 
@@ -3057,27 +3164,6 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
 }
 
-#ifndef CONFIG_64BIT
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
-	u64 last_update_time_copy;
-	u64 last_update_time;
-
-	do {
-		last_update_time_copy = cfs_rq->load_last_update_time_copy;
-		smp_rmb();
-		last_update_time = cfs_rq->avg.last_update_time;
-	} while (last_update_time != last_update_time_copy);
-
-	return last_update_time;
-}
-#else
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->avg.last_update_time;
-}
-#endif
-
 /*
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
@@ -3328,6 +3414,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
+	update_tg_cfs_util(se, false);
 
 	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
@@ -3429,6 +3516,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	update_min_vruntime(cfs_rq);
 	update_cfs_shares(cfs_rq);
+	update_tg_cfs_util(se, false);
+
 }
 
 /*
@@ -3606,6 +3695,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	 */
 	update_load_avg(curr, 1);
 	update_cfs_shares(cfs_rq);
+	update_tg_cfs_util(curr, false);
 
 #ifdef CONFIG_SCHED_HRTICK
 	/*
@@ -4479,6 +4569,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		update_load_avg(se, 1);
 		update_cfs_shares(cfs_rq);
+		update_tg_cfs_util(se, false);
 	}
 
 	if (!se)
@@ -4539,6 +4630,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		update_load_avg(se, 1);
 		update_cfs_shares(cfs_rq);
+		update_tg_cfs_util(se, false);
 	}
 
 	if (!se)
@@ -6328,6 +6420,8 @@ static void update_blocked_averages(int cpu)
 
 		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
 			update_tg_load_avg(cfs_rq, 0);
+		/* Propagate pending util changes to the parent */
+		update_tg_cfs_util(cfs_rq->tg->se[cpu], true);
 	}
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
@@ -8405,6 +8499,20 @@ static void detach_task_cfs_rq(struct task_struct *p)
 
 	/* Catch up with the cfs_rq and remove our load when we leave */
 	detach_entity_load_avg(cfs_rq, se);
+
+	/*
+	 * Propagate the detach across the tg tree to ake it visible to the
+	 * root
+	 */
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+
+		update_load_avg(se, 1);
+		update_tg_cfs_util(se, false);
+	}
 }
 
 static void attach_task_cfs_rq(struct task_struct *p)
@@ -8434,8 +8542,21 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
 
 static void switched_to_fair(struct rq *rq, struct task_struct *p)
 {
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq;
+
 	attach_task_cfs_rq(p);
 
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+
+		update_load_avg(se, 1);
+		update_tg_cfs_util(se, false);
+	}
+
 	if (task_on_rq_queued(p)) {
 		/*
 		 * We were most likely switched from sched_rt, so
@@ -8478,6 +8599,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 	atomic_long_set(&cfs_rq->removed_load_avg, 0);
 	atomic_long_set(&cfs_rq->removed_util_avg, 0);
 #endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	cfs_rq->diff_util_avg = 0;
+#endif
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 30750dc..3235ae4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -396,6 +396,7 @@ struct cfs_rq {
 	unsigned long runnable_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	unsigned long tg_load_avg_contrib;
+	long diff_util_avg;
 #endif
 	atomic_long_t removed_load_avg, removed_util_avg;
 #ifndef CONFIG_64BIT
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-05-24  8:57 [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization Vincent Guittot
  2016-05-24  9:55 ` Vincent Guittot
@ 2016-06-01 12:54 ` Peter Zijlstra
  2016-06-01 19:45   ` Dietmar Eggemann
  2016-06-05 23:58   ` Yuyang Du
  2016-06-01 13:36 ` Peter Zijlstra
  2 siblings, 2 replies; 9+ messages in thread
From: Peter Zijlstra @ 2016-06-01 12:54 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: mingo, linux-kernel, pjt, yuyang.du, dietmar.eggemann, bsegall,
	Morten.Rasmussen

On Tue, May 24, 2016 at 10:57:32AM +0200, Vincent Guittot wrote:
> Ensure that the changes of the utilization of a sched_entity will be
> reflected in the task_group hierarchy.
> 
> This patch tries another way than the flat utilization hierarchy proposal
> to ensure the changes will be propagated down to the root cfs.

Which would be:

 lkml.kernel.org/r/1460327765-18024-5-git-send-email-yuyang.du@intel.com

Right? Yuyang, there were some issues with the patches leading up to
that proposal, were you going to update the flat util thing without
those patches or can you find yourself in Vince's patches?

(just so I can get a picture of what all patches to look at when
reviewing)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-05-24  8:57 [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization Vincent Guittot
  2016-05-24  9:55 ` Vincent Guittot
  2016-06-01 12:54 ` Peter Zijlstra
@ 2016-06-01 13:36 ` Peter Zijlstra
  2016-06-01 15:26   ` Vincent Guittot
  2 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2016-06-01 13:36 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: mingo, linux-kernel, pjt, yuyang.du, dietmar.eggemann, bsegall,
	Morten.Rasmussen

On Tue, May 24, 2016 at 10:57:32AM +0200, Vincent Guittot wrote:

> +/*
> + * Save how much utilization has just been added/removed on cfs rq so we can
> + * propagate it across the whole tg tree
> + */
> +static void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta)
> +{
> +	if (cfs_rq->tg == &root_task_group)
> +		return;
> +
> +	cfs_rq->diff_util_avg += delta;
> +}

function name doesn't really reflect its purpose..

> +
> +/* Take into account the change of the utilization of a child task group */

This comment could be so much better... :-)

> +static void update_tg_cfs_util(struct sched_entity *se, int blocked)
> +{
> +	int delta;
> +	struct cfs_rq *cfs_rq;
> +	long update_util_avg;
> +	long last_update_time;
> +	long old_util_avg;
> +
> +
> +	/*
> +	 * update_blocked_average will call this function for root cfs_rq
> +	 * whose se is null. In this case just return
> +	 */
> +	if (!se)
> +		return;
> +
> +	if (entity_is_task(se))
> +		return 0;
> +
> +	/* Get sched_entity of cfs rq */
> +	cfs_rq = group_cfs_rq(se);
> +
> +	update_util_avg = cfs_rq->diff_util_avg;
> +
> +	if (!update_util_avg)
> +		return 0;
> +
> +	/* Clear pending changes */
> +	cfs_rq->diff_util_avg = 0;
> +
> +	/* Add changes in sched_entity utilizaton */
> +	old_util_avg = se->avg.util_avg;
> +	se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
> +	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
> +
> +	/* Get parent cfs_rq */
> +	cfs_rq = cfs_rq_of(se);
> +
> +	if (blocked) {
> +		/*
> +		 * blocked utilization has to be synchronized with its parent
> +		 * cfs_rq's timestamp
> +		 */
> +		last_update_time = cfs_rq_last_update_time(cfs_rq);
> +
> +		__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)),
> +			  &se->avg,
> +			  se->on_rq * scale_load_down(se->load.weight),
> +			  cfs_rq->curr == se, NULL);
> +	}
> +
> +	delta = se->avg.util_avg - old_util_avg;
> +
> +	cfs_rq->avg.util_avg =  max_t(long, cfs_rq->avg.util_avg + delta, 0);
> +	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
> +
> +	set_tg_cfs_rq_util(cfs_rq, delta);
> +}

So if I read this right it computes the utilization delta for group se's
and propagates it into the corresponding parent group cfs_rq ?

> @@ -6276,6 +6368,8 @@ static void update_blocked_averages(int cpu)
>  
>  		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>  			update_tg_load_avg(cfs_rq, 0);
> +		/* Propagate pending util changes to the parent */
> +		update_tg_cfs_util(cfs_rq->tg->se[cpu], true);

And this is why you need the strict bottom-up thing fixed..

> @@ -8370,6 +8464,20 @@ static void detach_task_cfs_rq(struct task_struct *p)
>  
>  	/* Catch up with the cfs_rq and remove our load when we leave */
>  	detach_entity_load_avg(cfs_rq, se);
> +
> +	/*
> +	 * Propagate the detach across the tg tree to ake it visible to the
> +	 * root
> +	 */
> +	for_each_sched_entity(se) {
> +		cfs_rq = cfs_rq_of(se);
> +
> +		if (cfs_rq_throttled(cfs_rq))
> +			break;
> +
> +		update_load_avg(se, 1);
> +		update_tg_cfs_util(se, false);
> +	}
>  }
>  
>  static void attach_task_cfs_rq(struct task_struct *p)
> @@ -8399,8 +8507,21 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
>  
>  static void switched_to_fair(struct rq *rq, struct task_struct *p)
>  {
> +	struct sched_entity *se = &p->se;
> +	struct cfs_rq *cfs_rq;
> +
>  	attach_task_cfs_rq(p);
>  
> +	for_each_sched_entity(se) {
> +		cfs_rq = cfs_rq_of(se);
> +
> +		if (cfs_rq_throttled(cfs_rq))
> +			break;
> +
> +		update_load_avg(se, 1);
> +		update_tg_cfs_util(se, false);
> +	}
> +
>  	if (task_on_rq_queued(p)) {
>  		/*
>  		 * We were most likely switched from sched_rt, so

So I would expect this to be in attach_task_cfs_rq() to mirror the
detach_task_cfs_rq().

Also, this change is somewhat independent but required for the 'flat'
util measure, right?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-06-01 13:36 ` Peter Zijlstra
@ 2016-06-01 15:26   ` Vincent Guittot
  0 siblings, 0 replies; 9+ messages in thread
From: Vincent Guittot @ 2016-06-01 15:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, Paul Turner, Yuyang Du,
	Dietmar Eggemann, Benjamin Segall, Morten Rasmussen

On 1 June 2016 at 15:36, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, May 24, 2016 at 10:57:32AM +0200, Vincent Guittot wrote:
>
>> +/*
>> + * Save how much utilization has just been added/removed on cfs rq so we can
>> + * propagate it across the whole tg tree
>> + */
>> +static void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta)
>> +{
>> +     if (cfs_rq->tg == &root_task_group)
>> +             return;
>> +
>> +     cfs_rq->diff_util_avg += delta;
>> +}
>
> function name doesn't really reflect its purpose..

ok, i will change it

>
>> +
>> +/* Take into account the change of the utilization of a child task group */
>
> This comment could be so much better... :-)

yes, i'm going to add more details of what is done

>
>> +static void update_tg_cfs_util(struct sched_entity *se, int blocked)
>> +{
>> +     int delta;
>> +     struct cfs_rq *cfs_rq;
>> +     long update_util_avg;
>> +     long last_update_time;
>> +     long old_util_avg;
>> +
>> +
>> +     /*
>> +      * update_blocked_average will call this function for root cfs_rq
>> +      * whose se is null. In this case just return
>> +      */
>> +     if (!se)
>> +             return;
>> +
>> +     if (entity_is_task(se))
>> +             return 0;
>> +
>> +     /* Get sched_entity of cfs rq */
>> +     cfs_rq = group_cfs_rq(se);
>> +
>> +     update_util_avg = cfs_rq->diff_util_avg;
>> +
>> +     if (!update_util_avg)
>> +             return 0;
>> +
>> +     /* Clear pending changes */
>> +     cfs_rq->diff_util_avg = 0;
>> +
>> +     /* Add changes in sched_entity utilizaton */
>> +     old_util_avg = se->avg.util_avg;
>> +     se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
>> +     se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
>> +
>> +     /* Get parent cfs_rq */
>> +     cfs_rq = cfs_rq_of(se);
>> +
>> +     if (blocked) {
>> +             /*
>> +              * blocked utilization has to be synchronized with its parent
>> +              * cfs_rq's timestamp
>> +              */
>> +             last_update_time = cfs_rq_last_update_time(cfs_rq);
>> +
>> +             __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)),
>> +                       &se->avg,
>> +                       se->on_rq * scale_load_down(se->load.weight),
>> +                       cfs_rq->curr == se, NULL);
>> +     }
>> +
>> +     delta = se->avg.util_avg - old_util_avg;
>> +
>> +     cfs_rq->avg.util_avg =  max_t(long, cfs_rq->avg.util_avg + delta, 0);
>> +     cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
>> +
>> +     set_tg_cfs_rq_util(cfs_rq, delta);
>> +}
>
> So if I read this right it computes the utilization delta for group se's
> and propagates it into the corresponding parent group cfs_rq ?

yes

>
>> @@ -6276,6 +6368,8 @@ static void update_blocked_averages(int cpu)
>>
>>               if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>>                       update_tg_load_avg(cfs_rq, 0);
>> +             /* Propagate pending util changes to the parent */
>> +             update_tg_cfs_util(cfs_rq->tg->se[cpu], true);
>
> And this is why you need the strict bottom-up thing fixed..

yes

>
>> @@ -8370,6 +8464,20 @@ static void detach_task_cfs_rq(struct task_struct *p)
>>
>>       /* Catch up with the cfs_rq and remove our load when we leave */
>>       detach_entity_load_avg(cfs_rq, se);
>> +
>> +     /*
>> +      * Propagate the detach across the tg tree to ake it visible to the
>> +      * root
>> +      */
>> +     for_each_sched_entity(se) {
>> +             cfs_rq = cfs_rq_of(se);
>> +
>> +             if (cfs_rq_throttled(cfs_rq))
>> +                     break;
>> +
>> +             update_load_avg(se, 1);
>> +             update_tg_cfs_util(se, false);
>> +     }
>>  }
>>
>>  static void attach_task_cfs_rq(struct task_struct *p)
>> @@ -8399,8 +8507,21 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
>>
>>  static void switched_to_fair(struct rq *rq, struct task_struct *p)
>>  {
>> +     struct sched_entity *se = &p->se;
>> +     struct cfs_rq *cfs_rq;
>> +
>>       attach_task_cfs_rq(p);
>>
>> +     for_each_sched_entity(se) {
>> +             cfs_rq = cfs_rq_of(se);
>> +
>> +             if (cfs_rq_throttled(cfs_rq))
>> +                     break;
>> +
>> +             update_load_avg(se, 1);
>> +             update_tg_cfs_util(se, false);
>> +     }
>> +
>>       if (task_on_rq_queued(p)) {
>>               /*
>>                * We were most likely switched from sched_rt, so
>
> So I would expect this to be in attach_task_cfs_rq() to mirror the
> detach_task_cfs_rq().

yes. My goal what to rely on the enqueue to the propagate the move
during a task_move_group but it's better to place it in
attach_task_cfs_rq so we have same sequence for attaching and
detaching

>
> Also, this change is somewhat independent but required for the 'flat'
> util measure, right?

don't know if it's needed for flat util measure as the main interest
of flat util measure is to not go through the tg tree

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-06-01 12:54 ` Peter Zijlstra
@ 2016-06-01 19:45   ` Dietmar Eggemann
  2016-06-05 23:58   ` Yuyang Du
  1 sibling, 0 replies; 9+ messages in thread
From: Dietmar Eggemann @ 2016-06-01 19:45 UTC (permalink / raw)
  To: Peter Zijlstra, Vincent Guittot
  Cc: mingo, linux-kernel, pjt, yuyang.du, bsegall, Morten.Rasmussen

On 01/06/16 13:54, Peter Zijlstra wrote:
> On Tue, May 24, 2016 at 10:57:32AM +0200, Vincent Guittot wrote:
>> Ensure that the changes of the utilization of a sched_entity will be
>> reflected in the task_group hierarchy.
>>
>> This patch tries another way than the flat utilization hierarchy proposal
>> to ensure the changes will be propagated down to the root cfs.

IMHO, the 'flat utilization hierarchy' discussion started here:
https://lkml.org/lkml/2016/4/1/514

In the meantime I continued to play with the idea of a flat utilization
hierarchy based on the exiting PELT code. I just sent out the RFC patch
set for people to compare with Vincent's approach.

> Which would be:
> 
>  lkml.kernel.org/r/1460327765-18024-5-git-send-email-yuyang.du@intel.com
> 
> Right? Yuyang, there were some issues with the patches leading up to
> that proposal, were you going to update the flat util thing without
> those patches or can you find yourself in Vince's patches?

I think Yuyang dropped the 'flat utilization hierarchy' in
https://lkml.org/lkml/2016/4/28/270

> (just so I can get a picture of what all patches to look at when
> reviewing)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-06-01 12:54 ` Peter Zijlstra
  2016-06-01 19:45   ` Dietmar Eggemann
@ 2016-06-05 23:58   ` Yuyang Du
  1 sibling, 0 replies; 9+ messages in thread
From: Yuyang Du @ 2016-06-05 23:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Vincent Guittot, mingo, linux-kernel, pjt, dietmar.eggemann,
	bsegall, Morten.Rasmussen

On Wed, Jun 01, 2016 at 02:54:07PM +0200, Peter Zijlstra wrote:
> On Tue, May 24, 2016 at 10:57:32AM +0200, Vincent Guittot wrote:
> > Ensure that the changes of the utilization of a sched_entity will be
> > reflected in the task_group hierarchy.
> > 
> > This patch tries another way than the flat utilization hierarchy proposal
> > to ensure the changes will be propagated down to the root cfs.
> 
> Which would be:
> 
>  lkml.kernel.org/r/1460327765-18024-5-git-send-email-yuyang.du@intel.com
> 
> Right? Yuyang, there were some issues with the patches leading up to
> that proposal, were you going to update the flat util thing without
> those patches or can you find yourself in Vince's patches?

Sure. I am rebasing my proposal patches, will send out later.
 
> (just so I can get a picture of what all patches to look at when
> reviewing)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-05-24  9:55 ` Vincent Guittot
@ 2016-06-06 10:52   ` Dietmar Eggemann
  2016-06-06 12:44     ` Vincent Guittot
  0 siblings, 1 reply; 9+ messages in thread
From: Dietmar Eggemann @ 2016-06-06 10:52 UTC (permalink / raw)
  To: Vincent Guittot, peterz, mingo, linux-kernel, pjt
  Cc: yuyang.du, bsegall, Morten.Rasmussen

On 24/05/16 10:55, Vincent Guittot wrote:

[...]

> +/* Take into account the change of the utilization of a child task group */
> +static void update_tg_cfs_util(struct sched_entity *se, int blocked)
> +{
> +	int delta;
> +	struct cfs_rq *cfs_rq;
> +	long update_util_avg;
> +	long last_update_time;
> +	long old_util_avg;
> +
> +
> +	/*
> +	 * update_blocked_average will call this function for root cfs_rq
> +	 * whose se is null. In this case just return
> +	 */
> +	if (!se)
> +		return;
> +
> +	if (entity_is_task(se))
> +		return 0;

void function

> +
> +	/* Get sched_entity of cfs rq */

You mean /* Get cfs_rq owned by this task group */?

> +	cfs_rq = group_cfs_rq(se);
> +
> +	update_util_avg = cfs_rq->diff_util_avg;
> +
> +	if (!update_util_avg)
> +		return 0;

Couldn't you not just get rid of long update_util_avg and only use
cfs_rq->diff_util_avg here (clearing pending changes after you set
se->avg.util_avg)?

> +
> +	/* Clear pending changes */
> +	cfs_rq->diff_util_avg = 0;
> +
> +	/* Add changes in sched_entity utilizaton */
> +	old_util_avg = se->avg.util_avg;
> +	se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
> +	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
> +
> +	/* Get parent cfs_rq */
> +	cfs_rq = cfs_rq_of(se);
> +
> +	if (blocked) {
> +		/*
> +		 * blocked utilization has to be synchronized with its parent
> +		 * cfs_rq's timestamp
> +		 */

We don't have stand-alone blocked utilization any more although the
function is still called update_blocked_averages(). Do you need this for
cpu's going through idle periods?
It's also necessary because there could be other se's which could have
been [en|de]queued at/from this cfs_rq so its last_update_time value is
more recent?

[...]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization
  2016-06-06 10:52   ` Dietmar Eggemann
@ 2016-06-06 12:44     ` Vincent Guittot
  0 siblings, 0 replies; 9+ messages in thread
From: Vincent Guittot @ 2016-06-06 12:44 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, Paul Turner,
	Yuyang Du, Benjamin Segall, Morten Rasmussen

On 6 June 2016 at 12:52, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
> On 24/05/16 10:55, Vincent Guittot wrote:
>
> [...]
>
>> +/* Take into account the change of the utilization of a child task group */
>> +static void update_tg_cfs_util(struct sched_entity *se, int blocked)
>> +{
>> +     int delta;
>> +     struct cfs_rq *cfs_rq;
>> +     long update_util_avg;
>> +     long last_update_time;
>> +     long old_util_avg;
>> +
>> +
>> +     /*
>> +      * update_blocked_average will call this function for root cfs_rq
>> +      * whose se is null. In this case just return
>> +      */
>> +     if (!se)
>> +             return;
>> +
>> +     if (entity_is_task(se))
>> +             return 0;
>
> void function

yes, i have seen that and correct it in the next version that i'm preparing

>
>> +
>> +     /* Get sched_entity of cfs rq */
>
> You mean /* Get cfs_rq owned by this task group */?

yes

>
>> +     cfs_rq = group_cfs_rq(se);
>> +
>> +     update_util_avg = cfs_rq->diff_util_avg;
>> +
>> +     if (!update_util_avg)
>> +             return 0;
>
> Couldn't you not just get rid of long update_util_avg and only use
> cfs_rq->diff_util_avg here (clearing pending changes after you set
> se->avg.util_avg)?

yes probably
At the origin, i have done that to prevent simultaneous use of the
value but it's not need AFAICT now

>
>> +
>> +     /* Clear pending changes */
>> +     cfs_rq->diff_util_avg = 0;
>> +
>> +     /* Add changes in sched_entity utilizaton */
>> +     old_util_avg = se->avg.util_avg;
>> +     se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
>> +     se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
>> +
>> +     /* Get parent cfs_rq */
>> +     cfs_rq = cfs_rq_of(se);
>> +
>> +     if (blocked) {
>> +             /*
>> +              * blocked utilization has to be synchronized with its parent
>> +              * cfs_rq's timestamp
>> +              */
>
> We don't have stand-alone blocked utilization any more although the
> function is still called update_blocked_averages(). Do you need this for
> cpu's going through idle periods?

yes blocked load/utilization has been merged with runnable ones but
there are still present and they have to be updated  for idle cpus

> It's also necessary because there could be other se's which could have
> been [en|de]queued at/from this cfs_rq so its last_update_time value is
> more recent?

yes, the parent can have been updated because of other sched_entities
so we have to aligned time stamp before reflecting the change

>
> [...]

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-06-06 12:45 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-24  8:57 [RFC PATCH v2] sched: reflect sched_entity movement into task_group's utilization Vincent Guittot
2016-05-24  9:55 ` Vincent Guittot
2016-06-06 10:52   ` Dietmar Eggemann
2016-06-06 12:44     ` Vincent Guittot
2016-06-01 12:54 ` Peter Zijlstra
2016-06-01 19:45   ` Dietmar Eggemann
2016-06-05 23:58   ` Yuyang Du
2016-06-01 13:36 ` Peter Zijlstra
2016-06-01 15:26   ` Vincent Guittot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).