linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating
@ 2020-05-06 15:53 Vincent Guittot
  2020-05-19 10:28 ` Dietmar Eggemann
  2020-05-19 18:44 ` [tip: sched/core] sched/pelt: Sync " tip-bot2 for Vincent Guittot
  0 siblings, 2 replies; 5+ messages in thread
From: Vincent Guittot @ 2020-05-06 15:53 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
	mgorman, linux-kernel
  Cc: Vincent Guittot

update_tg_cfs_*() propagate the impact of the attach/detach of an entity
down into the cfs_rq hierarchy and must keep the sync with the current pelt
window.

Even if we can't sync child cfs_rq and its group se, we can sync the group
se and its parent cfs_rq with current position in the PELT window. In fact,
we must keep them sync in order to stay also synced with others entities
and group entities that are already attached to the cfs_rq.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---

I have added update_tg_cfs_load() because there is no reason that we don't
sync load_avg with load_sum even if the impact is probably less obvious
because it's not simple propagation of the diff.

 kernel/sched/fair.c | 49 +++++++++++++++++++++++++--------------------
 kernel/sched/pelt.c | 24 ++++++++++++++++++++++
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 02f323b85b6d..df3923a65162 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3441,52 +3441,46 @@ static inline void
 update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
 	long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/* Nothing to update */
 	if (!delta)
 		return;
 
-	/*
-	 * The relation between sum and avg is:
-	 *
-	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-	 *
-	 * however, the PELT windows are not aligned between grq and gse.
-	 */
-
 	/* Set new sched_entity's utilization */
 	se->avg.util_avg = gcfs_rq->avg.util_avg;
-	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+	se->avg.util_sum = se->avg.util_avg * divider;
 
 	/* Update parent cfs_rq utilization */
 	add_positive(&cfs_rq->avg.util_avg, delta);
-	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
 }
 
 static inline void
 update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
 	long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/* Nothing to update */
 	if (!delta)
 		return;
 
-	/*
-	 * The relation between sum and avg is:
-	 *
-	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-	 *
-	 * however, the PELT windows are not aligned between grq and gse.
-	 */
-
 	/* Set new sched_entity's runnable */
 	se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
-	se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
+	se->avg.runnable_sum = se->avg.runnable_avg * divider;
 
 	/* Update parent cfs_rq runnable */
 	add_positive(&cfs_rq->avg.runnable_avg, delta);
-	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
+	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
 }
 
 static inline void
@@ -3496,19 +3490,26 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	unsigned long load_avg;
 	u64 load_sum = 0;
 	s64 delta_sum;
+	u32 divider;
 
 	if (!runnable_sum)
 		return;
 
 	gcfs_rq->prop_runnable_sum = 0;
 
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+
 	if (runnable_sum >= 0) {
 		/*
 		 * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
 		 * the CPU is saturated running == runnable.
 		 */
 		runnable_sum += se->avg.load_sum;
-		runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
+		runnable_sum = min_t(long, runnable_sum, divider);
 	} else {
 		/*
 		 * Estimate the new unweighted runnable_sum of the gcfs_rq by
@@ -3533,7 +3534,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	runnable_sum = max(runnable_sum, running_sum);
 
 	load_sum = (s64)se_weight(se) * runnable_sum;
-	load_avg = div_s64(load_sum, LOAD_AVG_MAX);
+	load_avg = div_s64(load_sum, divider);
 
 	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
 	delta_avg = load_avg - se->avg.load_avg;
@@ -3697,6 +3698,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
  */
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
 	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/*
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index b647d04d9c8b..1feff80e7e45 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
 	return 1;
 }
 
+/*
+ * When syncing *_avg with *_sum, we must take into account the current
+ * position in the PELT segment otherwise the remaining part of the segment
+ * will be considered as idle time whereas it's not yet elapsed and this will
+ * generate unwanted oscillation in the range [1002..1024[.
+ *
+ * The max value of *_sum varies with the position in the time segment and is
+ * equals to :
+ *
+ *   LOAD_AVG_MAX*y + sa->period_contrib
+ *
+ * which can be simplified into:
+ *
+ *   LOAD_AVG_MAX - 1024 + sa->period_contrib
+ *
+ * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
+ *
+ * The same care must be taken when a sched entity is added, updated or
+ * removed from a cfs_rq and we need to update sched_avg. Scheduler entities
+ * and the cfs rq, to which they are attached, have the same position in the
+ * time segment because they use the same clock. This means that we can use
+ * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
+ * if it's more convenient.
+ */
 static __always_inline void
 ___update_load_avg(struct sched_avg *sa, unsigned long load)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating
  2020-05-06 15:53 [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating Vincent Guittot
@ 2020-05-19 10:28 ` Dietmar Eggemann
  2020-05-19 15:41   ` Vincent Guittot
  2020-05-19 18:44 ` [tip: sched/core] sched/pelt: Sync " tip-bot2 for Vincent Guittot
  1 sibling, 1 reply; 5+ messages in thread
From: Dietmar Eggemann @ 2020-05-19 10:28 UTC (permalink / raw)
  To: Vincent Guittot, mingo, peterz, juri.lelli, rostedt, bsegall,
	mgorman, linux-kernel

On 06/05/2020 17:53, Vincent Guittot wrote:

[...]

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 02f323b85b6d..df3923a65162 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3441,52 +3441,46 @@ static inline void
>  update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
>  {
>  	long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
> +	/*
> +	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
> +	 * See ___update_load_avg() for details.
> +	 */
> +	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;

Why not doing the assignment (like in update_tg_cfs_load()) after the
next condition? Same question for update_tg_cfs_runnable().

[...]

>  static inline void
>  update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
>  {
>  	long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
> +	/*
> +	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
> +	 * See ___update_load_avg() for details.
> +	 */
> +	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;

We know have 6 assignments like this in fair.c and 1 in pelt.c. Could
this not be refactored by using something like this in pelt.h:

+static inline u32 get_divider(struct sched_avg *avg)
+{
+       return LOAD_AVG_MAX - 1024 + avg->period_contrib;
+}

[...]

> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
> index b647d04d9c8b..1feff80e7e45 100644
> --- a/kernel/sched/pelt.c
> +++ b/kernel/sched/pelt.c
> @@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
>  	return 1;
>  }
>  
> +/*
> + * When syncing *_avg with *_sum, we must take into account the current
> + * position in the PELT segment otherwise the remaining part of the segment
> + * will be considered as idle time whereas it's not yet elapsed and this will
> + * generate unwanted oscillation in the range [1002..1024[.
> + *
> + * The max value of *_sum varies with the position in the time segment and is
> + * equals to :
> + *
> + *   LOAD_AVG_MAX*y + sa->period_contrib
> + *
> + * which can be simplified into:
> + *
> + *   LOAD_AVG_MAX - 1024 + sa->period_contrib
> + *
> + * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024

Isn't this rather '~' instead of '==', even for y^32 = 0.5 ?

47742 * 0.5^(1/32) ~ 47742 - 1024


Apart from that, LGTM

Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating
  2020-05-19 10:28 ` Dietmar Eggemann
@ 2020-05-19 15:41   ` Vincent Guittot
  2020-05-20 10:29     ` Dietmar Eggemann
  0 siblings, 1 reply; 5+ messages in thread
From: Vincent Guittot @ 2020-05-19 15:41 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Steven Rostedt,
	Ben Segall, Mel Gorman, linux-kernel

On Tue, 19 May 2020 at 12:28, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>
> On 06/05/2020 17:53, Vincent Guittot wrote:
>
> [...]
>
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 02f323b85b6d..df3923a65162 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -3441,52 +3441,46 @@ static inline void
> >  update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
> >  {
> >       long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
> > +     /*
> > +      * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
> > +      * See ___update_load_avg() for details.
> > +      */
> > +     u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
>
> Why not doing the assignment (like in update_tg_cfs_load()) after the
> next condition? Same question for update_tg_cfs_runnable().

In fact, I expect the compiler to be smart enough to do this at the best place

>
> [...]
>
> >  static inline void
> >  update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
> >  {
> >       long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
> > +     /*
> > +      * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
> > +      * See ___update_load_avg() for details.
> > +      */
> > +     u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
>
> We know have 6 assignments like this in fair.c and 1 in pelt.c. Could
> this not be refactored by using something like this in pelt.h:
>
> +static inline u32 get_divider(struct sched_avg *avg)

That's a good point
I would add a pelt in the name like
static inline u32 get_pelt_divider(struct sched_avg *avg)

> +{
> +       return LOAD_AVG_MAX - 1024 + avg->period_contrib;
> +}
>
> [...]
>
> > diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
> > index b647d04d9c8b..1feff80e7e45 100644
> > --- a/kernel/sched/pelt.c
> > +++ b/kernel/sched/pelt.c
> > @@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
> >       return 1;
> >  }
> >
> > +/*
> > + * When syncing *_avg with *_sum, we must take into account the current
> > + * position in the PELT segment otherwise the remaining part of the segment
> > + * will be considered as idle time whereas it's not yet elapsed and this will
> > + * generate unwanted oscillation in the range [1002..1024[.
> > + *
> > + * The max value of *_sum varies with the position in the time segment and is
> > + * equals to :
> > + *
> > + *   LOAD_AVG_MAX*y + sa->period_contrib
> > + *
> > + * which can be simplified into:
> > + *
> > + *   LOAD_AVG_MAX - 1024 + sa->period_contrib
> > + *
> > + * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
>
> Isn't this rather '~' instead of '==', even for y^32 = 0.5 ?
>
> 47742 * 0.5^(1/32) ~ 47742 - 1024

With integer precision and the runnable_avg_yN_inv array, you've got
exactly 1024

>
>
> Apart from that, LGTM
>
> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip: sched/core] sched/pelt: Sync util/runnable_sum with PELT window when propagating
  2020-05-06 15:53 [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating Vincent Guittot
  2020-05-19 10:28 ` Dietmar Eggemann
@ 2020-05-19 18:44 ` tip-bot2 for Vincent Guittot
  1 sibling, 0 replies; 5+ messages in thread
From: tip-bot2 for Vincent Guittot @ 2020-05-19 18:44 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Vincent Guittot, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     95d685935a2edf209fc68f52494ede4a382a6c2b
Gitweb:        https://git.kernel.org/tip/95d685935a2edf209fc68f52494ede4a382a6c2b
Author:        Vincent Guittot <vincent.guittot@linaro.org>
AuthorDate:    Wed, 06 May 2020 17:53:01 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 19 May 2020 20:34:14 +02:00

sched/pelt: Sync util/runnable_sum with PELT window when propagating

update_tg_cfs_*() propagate the impact of the attach/detach of an entity
down into the cfs_rq hierarchy and must keep the sync with the current pelt
window.

Even if we can't sync child cfs_rq and its group se, we can sync the group
se and its parent cfs_rq with current position in the PELT window. In fact,
we must keep them sync in order to stay also synced with others entities
and group entities that are already attached to the cfs_rq.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200506155301.14288-1-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 49 ++++++++++++++++++++++++--------------------
 kernel/sched/pelt.c | 24 ++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4e58686..44b0c8e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3441,52 +3441,46 @@ static inline void
 update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
 	long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/* Nothing to update */
 	if (!delta)
 		return;
 
-	/*
-	 * The relation between sum and avg is:
-	 *
-	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-	 *
-	 * however, the PELT windows are not aligned between grq and gse.
-	 */
-
 	/* Set new sched_entity's utilization */
 	se->avg.util_avg = gcfs_rq->avg.util_avg;
-	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+	se->avg.util_sum = se->avg.util_avg * divider;
 
 	/* Update parent cfs_rq utilization */
 	add_positive(&cfs_rq->avg.util_avg, delta);
-	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
 }
 
 static inline void
 update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
 	long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/* Nothing to update */
 	if (!delta)
 		return;
 
-	/*
-	 * The relation between sum and avg is:
-	 *
-	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-	 *
-	 * however, the PELT windows are not aligned between grq and gse.
-	 */
-
 	/* Set new sched_entity's runnable */
 	se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
-	se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
+	se->avg.runnable_sum = se->avg.runnable_avg * divider;
 
 	/* Update parent cfs_rq runnable */
 	add_positive(&cfs_rq->avg.runnable_avg, delta);
-	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
+	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
 }
 
 static inline void
@@ -3496,19 +3490,26 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	unsigned long load_avg;
 	u64 load_sum = 0;
 	s64 delta_sum;
+	u32 divider;
 
 	if (!runnable_sum)
 		return;
 
 	gcfs_rq->prop_runnable_sum = 0;
 
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+
 	if (runnable_sum >= 0) {
 		/*
 		 * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
 		 * the CPU is saturated running == runnable.
 		 */
 		runnable_sum += se->avg.load_sum;
-		runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
+		runnable_sum = min_t(long, runnable_sum, divider);
 	} else {
 		/*
 		 * Estimate the new unweighted runnable_sum of the gcfs_rq by
@@ -3533,7 +3534,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	runnable_sum = max(runnable_sum, running_sum);
 
 	load_sum = (s64)se_weight(se) * runnable_sum;
-	load_avg = div_s64(load_sum, LOAD_AVG_MAX);
+	load_avg = div_s64(load_sum, divider);
 
 	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
 	delta_avg = load_avg - se->avg.load_avg;
@@ -3697,6 +3698,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
  */
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
 	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/*
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index b647d04..b4b1ff9 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
 	return 1;
 }
 
+/*
+ * When syncing *_avg with *_sum, we must take into account the current
+ * position in the PELT segment otherwise the remaining part of the segment
+ * will be considered as idle time whereas it's not yet elapsed and this will
+ * generate unwanted oscillation in the range [1002..1024[.
+ *
+ * The max value of *_sum varies with the position in the time segment and is
+ * equals to :
+ *
+ *   LOAD_AVG_MAX*y + sa->period_contrib
+ *
+ * which can be simplified into:
+ *
+ *   LOAD_AVG_MAX - 1024 + sa->period_contrib
+ *
+ * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
+ *
+ * The same care must be taken when a sched entity is added, updated or
+ * removed from a cfs_rq and we need to update sched_avg. Scheduler entities
+ * and the cfs rq, to which they are attached, have the same position in the
+ * time segment because they use the same clock. This means that we can use
+ * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
+ * if it's more convenient.
+ */
 static __always_inline void
 ___update_load_avg(struct sched_avg *sa, unsigned long load)
 {

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating
  2020-05-19 15:41   ` Vincent Guittot
@ 2020-05-20 10:29     ` Dietmar Eggemann
  0 siblings, 0 replies; 5+ messages in thread
From: Dietmar Eggemann @ 2020-05-20 10:29 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Steven Rostedt,
	Ben Segall, Mel Gorman, linux-kernel

On 19/05/2020 17:41, Vincent Guittot wrote:
> On Tue, 19 May 2020 at 12:28, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>>
>> On 06/05/2020 17:53, Vincent Guittot wrote:

[...]

>>> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
>>> index b647d04d9c8b..1feff80e7e45 100644
>>> --- a/kernel/sched/pelt.c
>>> +++ b/kernel/sched/pelt.c
>>> @@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
>>>       return 1;
>>>  }
>>>
>>> +/*
>>> + * When syncing *_avg with *_sum, we must take into account the current
>>> + * position in the PELT segment otherwise the remaining part of the segment
>>> + * will be considered as idle time whereas it's not yet elapsed and this will
>>> + * generate unwanted oscillation in the range [1002..1024[.
>>> + *
>>> + * The max value of *_sum varies with the position in the time segment and is
>>> + * equals to :
>>> + *
>>> + *   LOAD_AVG_MAX*y + sa->period_contrib
>>> + *
>>> + * which can be simplified into:
>>> + *
>>> + *   LOAD_AVG_MAX - 1024 + sa->period_contrib
>>> + *
>>> + * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
>>
>> Isn't this rather '~' instead of '==', even for y^32 = 0.5 ?
>>
>> 47742 * 0.5^(1/32) ~ 47742 - 1024
> 
> With integer precision and the runnable_avg_yN_inv array, you've got
> exactly 1024

Ah, OK, I forgot about this and that this is related to commit
625ed2bf049d ("sched/cfs: Make util/load_avg more stable").

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-05-20 10:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-06 15:53 [PATCH v2] sched/pelt: sync util/runnable_sum with PELT window when propagating Vincent Guittot
2020-05-19 10:28 ` Dietmar Eggemann
2020-05-19 15:41   ` Vincent Guittot
2020-05-20 10:29     ` Dietmar Eggemann
2020-05-19 18:44 ` [tip: sched/core] sched/pelt: Sync " tip-bot2 for Vincent Guittot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).