All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wanpeng Li <kernellwp@gmail.com>
To: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Yuyang Du <yuyang.du@intel.com>,
	Morten Rasmussen <Morten.Rasmussen@arm.com>,
	"linaro-kernel@lists.linaro.org" <linaro-kernel@lists.linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Paul Turner <pjt@google.com>,
	Benjamin Segall <bsegall@google.com>
Subject: Re: [PATCH 4/7 v3] sched: propagate load during synchronous attach/detach
Date: Mon, 19 Sep 2016 11:19:17 +0800	[thread overview]
Message-ID: <CANRm+Cxc+xmGksgo4mB+ygNqEAXCPib3jFsP+s6+JpupCjmtkA@mail.gmail.com> (raw)
In-Reply-To: <1473666472-13749-5-git-send-email-vincent.guittot@linaro.org>

2016-09-12 15:47 GMT+08:00 Vincent Guittot <vincent.guittot@linaro.org>:
> When a task moves from/to a cfs_rq, we set a flag which is then used to
> propagate the change at parent level (sched_entity and cfs_rq) during
> next update. If the cfs_rq is throttled, the flag will stay pending until
> the cfs_rw is unthrottled.
>
> For propagating the utilization, we copy the utilization of child cfs_rq to
> the sched_entity.
>
> For propagating the load, we have to take into account the load of the
> whole task group in order to evaluate the load of the sched_entity.
> Similarly to what was done before the rewrite of PELT, we add a correction
> factor in case the task group's load is less than its share so it will
> contribute the same load of a task of equal weight.
>
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>  kernel/sched/fair.c  | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  kernel/sched/sched.h |   1 +
>  2 files changed, 170 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 0aa1d7d..e4015f6 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3017,6 +3017,132 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
>         }
>  }
>
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> +/* Take into account change of utilization of a child task group */
> +static inline void
> +update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +{
> +       struct cfs_rq *gcfs_rq =  group_cfs_rq(se);
> +       long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
> +
> +       /* Nothing to update */
> +       if (!delta)
> +               return;
> +
> +       /* Set new sched_entity's utilizaton */

s/utilizaton/utilization

> +       se->avg.util_avg = gcfs_rq->avg.util_avg;
> +       se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
> +
> +       /* Update parent cfs_rq utilization */
> +       cfs_rq->avg.util_avg =  max_t(long, cfs_rq->avg.util_avg + delta, 0);
> +       cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
> +}
> +
> +/* Take into account change of load of a child task group */
> +static inline void
> +update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +{
> +       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> +       long delta, load = gcfs_rq->avg.load_avg;
> +
> +       /* If the load of group cfs_rq is null, the load of the
> +        * sched_entity will also be null so we can skip the formula
> +        */
> +       if (load) {
> +               long tg_load;
> +
> +               /* Get tg's load and ensure tg_load > 0 */
> +               tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;
> +
> +               /* Ensure tg_load >= load and updated with current load*/
> +               tg_load -= gcfs_rq->tg_load_avg_contrib;
> +               tg_load += load;
> +
> +               /* scale gcfs_rq's load into tg's shares*/
> +               load *= scale_load_down(gcfs_rq->tg->shares);
> +               load /= tg_load;
> +
> +               /*
> +                * we need to compute a correction term in the case that the
> +                * task group is consuming <1 cpu so that we would contribute
> +                * the same load as a task of equal weight.
> +               */
> +               if (tg_load < scale_load_down(gcfs_rq->tg->shares)) {
> +                       load *= tg_load;
> +                       load /= scale_load_down(gcfs_rq->tg->shares);
> +               }
> +       }
> +
> +       delta = load - se->avg.load_avg;
> +
> +       /* Nothing to update */
> +       if (!delta)
> +               return;
> +
> +       /* Set new sched_entity's load */
> +       se->avg.load_avg = load;
> +       se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;
> +
> +       /* Update parent cfs_rq load */
> +       cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg + delta, 0);
> +       cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;
> +}
> +
> +static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq)
> +{
> +       /* set cfs_rq's flag */
> +       cfs_rq->propagate_avg = 1;
> +}
> +
> +static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se)
> +{
> +       /* Get my cfs_rq */
> +       struct cfs_rq *cfs_rq = group_cfs_rq(se);
> +
> +       /* Nothing to propagate */
> +       if (!cfs_rq->propagate_avg)
> +               return 0;
> +
> +       /* Clear my cfs_rq's flag */
> +       cfs_rq->propagate_avg = 0;
> +
> +       return 1;
> +}
> +
> +/* Update task and its cfs_rq load average */
> +static inline int propagate_entity_load_avg(struct sched_entity *se)
> +{
> +       struct cfs_rq *cfs_rq;
> +
> +       if (entity_is_task(se))
> +               return 0;
> +
> +       if (!test_and_clear_tg_cfs_propagate(se))
> +               return 0;
> +
> +       /* Get parent cfs_rq */
> +       cfs_rq = cfs_rq_of(se);
> +
> +       /* Propagate to parent */
> +       set_tg_cfs_propagate(cfs_rq);
> +
> +       /* Update utilization */
> +       update_tg_cfs_util(cfs_rq, se);
> +
> +       /* Update load */
> +       update_tg_cfs_load(cfs_rq, se);
> +
> +       return 1;
> +}
> +#else
> +static inline int propagate_entity_load_avg(struct sched_entity *se)
> +{
> +       return 0;
> +}
> +
> +static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
> +#endif
> +
>  /*
>   * Unsigned subtract and clamp on underflow.
>   *
> @@ -3093,6 +3219,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg,
>         u64 now = cfs_rq_clock_task(cfs_rq);
>         struct rq *rq = rq_of(cfs_rq);
>         int cpu = cpu_of(rq);
> +       int decayed;
>
>         /*
>          * Track task load average for carrying it to new CPU after migrated, and
> @@ -3103,7 +3230,11 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg,
>                           se->on_rq * scale_load_down(se->load.weight),
>                           cfs_rq->curr == se, NULL);
>
> -       if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
> +       decayed = update_cfs_rq_load_avg(now, cfs_rq, true);
> +
> +       decayed |= propagate_entity_load_avg(se);
> +
> +       if (decayed && update_tg)
>                 update_tg_load_avg(cfs_rq, 0);
>  }
>
> @@ -3122,6 +3253,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>         cfs_rq->avg.load_sum += se->avg.load_sum;
>         cfs_rq->avg.util_avg += se->avg.util_avg;
>         cfs_rq->avg.util_sum += se->avg.util_sum;
> +       set_tg_cfs_propagate(cfs_rq);
>
>         cfs_rq_util_change(cfs_rq);
>  }
> @@ -3141,6 +3273,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>         sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
>         sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
>         sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
> +       set_tg_cfs_propagate(cfs_rq);
>
>         cfs_rq_util_change(cfs_rq);
>  }
> @@ -8499,6 +8632,22 @@ static void detach_task_cfs_rq(struct task_struct *p)
>         update_load_avg(se, 0, 0);
>         detach_entity_load_avg(cfs_rq, se);
>         update_tg_load_avg(cfs_rq, false);
> +
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> +       /*
> +        * Propagate the detach across the tg tree to make it visible to the
> +        * root
> +        */
> +       se = se->parent;
> +       for_each_sched_entity(se) {
> +               cfs_rq = cfs_rq_of(se);
> +
> +               if (cfs_rq_throttled(cfs_rq))
> +                       break;
> +
> +               update_load_avg(se, 1, 0);
> +       }
> +#endif
>  }
>
>  static void attach_entity_cfs_rq(struct sched_entity *se)
> @@ -8517,6 +8666,22 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
>         update_load_avg(se, 0, !sched_feat(ATTACH_AGE_LOAD));
>         attach_entity_load_avg(cfs_rq, se);
>         update_tg_load_avg(cfs_rq, false);
> +
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> +       /*
> +        * Propagate the attach across the tg tree to make it visible to the
> +        * root
> +        */
> +       se = se->parent;
> +       for_each_sched_entity(se) {
> +               cfs_rq = cfs_rq_of(se);
> +
> +               if (cfs_rq_throttled(cfs_rq))
> +                       break;
> +
> +               update_load_avg(se, 1, 0);
> +       }
> +#endif
>  }
>
>  static void attach_task_cfs_rq(struct task_struct *p)
> @@ -8578,6 +8743,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
>         cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
>  #endif
>  #ifdef CONFIG_SMP
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> +       cfs_rq->propagate_avg = 0;
> +#endif
>         atomic_long_set(&cfs_rq->removed_load_avg, 0);
>         atomic_long_set(&cfs_rq->removed_util_avg, 0);
>  #endif
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 483616a..0517a9e 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -397,6 +397,7 @@ struct cfs_rq {
>         unsigned long runnable_load_avg;
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>         unsigned long tg_load_avg_contrib;
> +       unsigned long propagate_avg;
>  #endif
>         atomic_long_t removed_load_avg, removed_util_avg;
>  #ifndef CONFIG_64BIT
> --
> 1.9.1
>

  parent reply	other threads:[~2016-09-19  3:19 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-12  7:47 [PATCH 0/7 v3] sched: reflect sched_entity move into task_group's load Vincent Guittot
2016-09-12  7:47 ` [PATCH 1/7 v3] sched: factorize attach entity Vincent Guittot
2016-09-12  7:47 ` [PATCH 2/7 v3] sched: fix hierarchical order in rq->leaf_cfs_rq_list Vincent Guittot
2016-09-21 10:14   ` Dietmar Eggemann
2016-09-21 12:34     ` Vincent Guittot
2016-09-21 17:25       ` Dietmar Eggemann
2016-09-21 18:02         ` Vincent Guittot
2016-09-12  7:47 ` [PATCH 3/7 v3] sched: factorize PELT update Vincent Guittot
2016-09-15 13:09   ` Peter Zijlstra
2016-09-15 13:30     ` Vincent Guittot
2016-09-12  7:47 ` [PATCH 4/7 v3] sched: propagate load during synchronous attach/detach Vincent Guittot
2016-09-15 12:55   ` Peter Zijlstra
2016-09-15 13:01     ` Vincent Guittot
2016-09-15 12:59   ` Peter Zijlstra
2016-09-15 13:11     ` Vincent Guittot
2016-09-15 13:11   ` Dietmar Eggemann
2016-09-15 14:31     ` Vincent Guittot
2016-09-15 17:20       ` Dietmar Eggemann
2016-09-15 15:14     ` Peter Zijlstra
2016-09-15 17:36       ` Dietmar Eggemann
2016-09-15 17:54         ` Peter Zijlstra
2016-09-15 14:43   ` Peter Zijlstra
2016-09-15 14:51     ` Vincent Guittot
2016-09-19  3:19   ` Wanpeng Li [this message]
2016-09-12  7:47 ` [PATCH 5/7 v3] sched: propagate asynchrous detach Vincent Guittot
2016-09-12  7:47 ` [PATCH 6/7 v3] sched: fix task group initialization Vincent Guittot
2016-09-12  7:47 ` [PATCH 7/7 v3] sched: fix wrong utilization accounting when switching to fair class Vincent Guittot
2016-09-15 13:18   ` Peter Zijlstra
2016-09-15 15:36     ` Vincent Guittot
2016-09-16 12:16       ` Peter Zijlstra
2016-09-16 14:23         ` Vincent Guittot
2016-09-20 11:54           ` Peter Zijlstra
2016-09-20 13:06             ` Vincent Guittot
2016-09-22 12:25               ` Peter Zijlstra
2016-09-26 14:53                 ` Peter Zijlstra
2016-09-20 16:59             ` bsegall
2016-09-22  8:33               ` Peter Zijlstra
2016-09-22 17:10                 ` bsegall
2016-09-16 10:51   ` Peter Zijlstra
2016-09-16 12:45     ` Vincent Guittot
2016-09-30 12:01   ` [tip:sched/core] sched/core: Fix incorrect " tip-bot for Vincent Guittot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANRm+Cxc+xmGksgo4mB+ygNqEAXCPib3jFsP+s6+JpupCjmtkA@mail.gmail.com \
    --to=kernellwp@gmail.com \
    --cc=Morten.Rasmussen@arm.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=linaro-kernel@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=vincent.guittot@linaro.org \
    --cc=yuyang.du@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.