From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S938643AbcISDTV (ORCPT ); Sun, 18 Sep 2016 23:19:21 -0400 Received: from mail-wm0-f68.google.com ([74.125.82.68]:35670 "EHLO mail-wm0-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S936646AbcISDTT (ORCPT ); Sun, 18 Sep 2016 23:19:19 -0400 MIME-Version: 1.0 In-Reply-To: <1473666472-13749-5-git-send-email-vincent.guittot@linaro.org> References: <1473666472-13749-1-git-send-email-vincent.guittot@linaro.org> <1473666472-13749-5-git-send-email-vincent.guittot@linaro.org> From: Wanpeng Li Date: Mon, 19 Sep 2016 11:19:17 +0800 Message-ID: Subject: Re: [PATCH 4/7 v3] sched: propagate load during synchronous attach/detach To: Vincent Guittot Cc: Peter Zijlstra , Ingo Molnar , "linux-kernel@vger.kernel.org" , Yuyang Du , Morten Rasmussen , "linaro-kernel@lists.linaro.org" , Dietmar Eggemann , Paul Turner , Benjamin Segall Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org 2016-09-12 15:47 GMT+08:00 Vincent Guittot : > When a task moves from/to a cfs_rq, we set a flag which is then used to > propagate the change at parent level (sched_entity and cfs_rq) during > next update. If the cfs_rq is throttled, the flag will stay pending until > the cfs_rw is unthrottled. > > For propagating the utilization, we copy the utilization of child cfs_rq to > the sched_entity. > > For propagating the load, we have to take into account the load of the > whole task group in order to evaluate the load of the sched_entity. > Similarly to what was done before the rewrite of PELT, we add a correction > factor in case the task group's load is less than its share so it will > contribute the same load of a task of equal weight. > > Signed-off-by: Vincent Guittot > --- > kernel/sched/fair.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++- > kernel/sched/sched.h | 1 + > 2 files changed, 170 insertions(+), 1 deletion(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 0aa1d7d..e4015f6 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -3017,6 +3017,132 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) > } > } > > +#ifdef CONFIG_FAIR_GROUP_SCHED > +/* Take into account change of utilization of a child task group */ > +static inline void > +update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se) > +{ > + struct cfs_rq *gcfs_rq = group_cfs_rq(se); > + long delta = gcfs_rq->avg.util_avg - se->avg.util_avg; > + > + /* Nothing to update */ > + if (!delta) > + return; > + > + /* Set new sched_entity's utilizaton */ s/utilizaton/utilization > + se->avg.util_avg = gcfs_rq->avg.util_avg; > + se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; > + > + /* Update parent cfs_rq utilization */ > + cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg + delta, 0); > + cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX; > +} > + > +/* Take into account change of load of a child task group */ > +static inline void > +update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se) > +{ > + struct cfs_rq *gcfs_rq = group_cfs_rq(se); > + long delta, load = gcfs_rq->avg.load_avg; > + > + /* If the load of group cfs_rq is null, the load of the > + * sched_entity will also be null so we can skip the formula > + */ > + if (load) { > + long tg_load; > + > + /* Get tg's load and ensure tg_load > 0 */ > + tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1; > + > + /* Ensure tg_load >= load and updated with current load*/ > + tg_load -= gcfs_rq->tg_load_avg_contrib; > + tg_load += load; > + > + /* scale gcfs_rq's load into tg's shares*/ > + load *= scale_load_down(gcfs_rq->tg->shares); > + load /= tg_load; > + > + /* > + * we need to compute a correction term in the case that the > + * task group is consuming <1 cpu so that we would contribute > + * the same load as a task of equal weight. > + */ > + if (tg_load < scale_load_down(gcfs_rq->tg->shares)) { > + load *= tg_load; > + load /= scale_load_down(gcfs_rq->tg->shares); > + } > + } > + > + delta = load - se->avg.load_avg; > + > + /* Nothing to update */ > + if (!delta) > + return; > + > + /* Set new sched_entity's load */ > + se->avg.load_avg = load; > + se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX; > + > + /* Update parent cfs_rq load */ > + cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg + delta, 0); > + cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX; > +} > + > +static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) > +{ > + /* set cfs_rq's flag */ > + cfs_rq->propagate_avg = 1; > +} > + > +static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se) > +{ > + /* Get my cfs_rq */ > + struct cfs_rq *cfs_rq = group_cfs_rq(se); > + > + /* Nothing to propagate */ > + if (!cfs_rq->propagate_avg) > + return 0; > + > + /* Clear my cfs_rq's flag */ > + cfs_rq->propagate_avg = 0; > + > + return 1; > +} > + > +/* Update task and its cfs_rq load average */ > +static inline int propagate_entity_load_avg(struct sched_entity *se) > +{ > + struct cfs_rq *cfs_rq; > + > + if (entity_is_task(se)) > + return 0; > + > + if (!test_and_clear_tg_cfs_propagate(se)) > + return 0; > + > + /* Get parent cfs_rq */ > + cfs_rq = cfs_rq_of(se); > + > + /* Propagate to parent */ > + set_tg_cfs_propagate(cfs_rq); > + > + /* Update utilization */ > + update_tg_cfs_util(cfs_rq, se); > + > + /* Update load */ > + update_tg_cfs_load(cfs_rq, se); > + > + return 1; > +} > +#else > +static inline int propagate_entity_load_avg(struct sched_entity *se) > +{ > + return 0; > +} > + > +static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {} > +#endif > + > /* > * Unsigned subtract and clamp on underflow. > * > @@ -3093,6 +3219,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg, > u64 now = cfs_rq_clock_task(cfs_rq); > struct rq *rq = rq_of(cfs_rq); > int cpu = cpu_of(rq); > + int decayed; > > /* > * Track task load average for carrying it to new CPU after migrated, and > @@ -3103,7 +3230,11 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg, > se->on_rq * scale_load_down(se->load.weight), > cfs_rq->curr == se, NULL); > > - if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg) > + decayed = update_cfs_rq_load_avg(now, cfs_rq, true); > + > + decayed |= propagate_entity_load_avg(se); > + > + if (decayed && update_tg) > update_tg_load_avg(cfs_rq, 0); > } > > @@ -3122,6 +3253,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s > cfs_rq->avg.load_sum += se->avg.load_sum; > cfs_rq->avg.util_avg += se->avg.util_avg; > cfs_rq->avg.util_sum += se->avg.util_sum; > + set_tg_cfs_propagate(cfs_rq); > > cfs_rq_util_change(cfs_rq); > } > @@ -3141,6 +3273,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s > sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); > sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); > sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); > + set_tg_cfs_propagate(cfs_rq); > > cfs_rq_util_change(cfs_rq); > } > @@ -8499,6 +8632,22 @@ static void detach_task_cfs_rq(struct task_struct *p) > update_load_avg(se, 0, 0); > detach_entity_load_avg(cfs_rq, se); > update_tg_load_avg(cfs_rq, false); > + > +#ifdef CONFIG_FAIR_GROUP_SCHED > + /* > + * Propagate the detach across the tg tree to make it visible to the > + * root > + */ > + se = se->parent; > + for_each_sched_entity(se) { > + cfs_rq = cfs_rq_of(se); > + > + if (cfs_rq_throttled(cfs_rq)) > + break; > + > + update_load_avg(se, 1, 0); > + } > +#endif > } > > static void attach_entity_cfs_rq(struct sched_entity *se) > @@ -8517,6 +8666,22 @@ static void attach_entity_cfs_rq(struct sched_entity *se) > update_load_avg(se, 0, !sched_feat(ATTACH_AGE_LOAD)); > attach_entity_load_avg(cfs_rq, se); > update_tg_load_avg(cfs_rq, false); > + > +#ifdef CONFIG_FAIR_GROUP_SCHED > + /* > + * Propagate the attach across the tg tree to make it visible to the > + * root > + */ > + se = se->parent; > + for_each_sched_entity(se) { > + cfs_rq = cfs_rq_of(se); > + > + if (cfs_rq_throttled(cfs_rq)) > + break; > + > + update_load_avg(se, 1, 0); > + } > +#endif > } > > static void attach_task_cfs_rq(struct task_struct *p) > @@ -8578,6 +8743,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) > cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; > #endif > #ifdef CONFIG_SMP > +#ifdef CONFIG_FAIR_GROUP_SCHED > + cfs_rq->propagate_avg = 0; > +#endif > atomic_long_set(&cfs_rq->removed_load_avg, 0); > atomic_long_set(&cfs_rq->removed_util_avg, 0); > #endif > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index 483616a..0517a9e 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -397,6 +397,7 @@ struct cfs_rq { > unsigned long runnable_load_avg; > #ifdef CONFIG_FAIR_GROUP_SCHED > unsigned long tg_load_avg_contrib; > + unsigned long propagate_avg; > #endif > atomic_long_t removed_load_avg, removed_util_avg; > #ifndef CONFIG_64BIT > -- > 1.9.1 >