All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vincent Guittot <vincent.guittot@linaro.org>
To: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Morten Rasmussen <morten.rasmussen@foss.arm.com>,
	Brendan Jackman <brendan.jackman@arm.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>
Subject: Re: [PATCH 1/3] sched: Stop nohz stats when decayed
Date: Tue, 6 Feb 2018 15:31:26 +0100	[thread overview]
Message-ID: <CAKfTPtCfN4_GP+XjAjqWQA_G3_3cQ3XzWHRx2zFTpvvgS44uFA@mail.gmail.com> (raw)
In-Reply-To: <352e52af-1f2e-9b87-b94f-c5a4313d8b08@arm.com>

On 6 February 2018 at 15:16, Valentin Schneider
<valentin.schneider@arm.com> wrote:
> Hi Vincent,
>
> On 02/06/2018 08:32 AM, Vincent Guittot wrote:
>> Stopped the periodic update of blocked load when all idle CPUs have fully
>> decayed. We introduce a new nohz.has_blocked that reflect if some idle
>> CPUs has blocked load that have to be periodiccally updated. nohz.has_blocked
>> is set everytime that a Idle CPU can have blocked load and it is then clear
>> when no more blocked load has been detected during an update. We don't need
>> atomic operation but only to make cure of the right ordering when updating
>> nohz.idle_cpus_mask and nohz.has_blocked.
>>
>> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>> ---
>>  kernel/sched/fair.c  | 94 +++++++++++++++++++++++++++++++++++++++++-----------
>>  kernel/sched/sched.h |  1 +
>>  2 files changed, 75 insertions(+), 20 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 7af1fa9..279f4b2 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -5383,8 +5383,9 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
>>  static struct {
>>       cpumask_var_t idle_cpus_mask;
>>       atomic_t nr_cpus;
>> +     int has_blocked;                /* Idle CPUS has blocked load */
>>       unsigned long next_balance;     /* in jiffy units */
>> -     unsigned long next_stats;
>> +     unsigned long next_blocked;     /* Next update of blocked load in jiffies */
>>  } nohz ____cacheline_aligned;
>>
>>  #endif /* CONFIG_NO_HZ_COMMON */
>> @@ -6951,6 +6952,7 @@ enum fbq_type { regular, remote, all };
>>  #define LBF_DST_PINNED  0x04
>>  #define LBF_SOME_PINNED      0x08
>>  #define LBF_NOHZ_STATS       0x10
>> +#define LBF_NOHZ_AGAIN       0x20
>>
>>  struct lb_env {
>>       struct sched_domain     *sd;
>> @@ -7335,8 +7337,6 @@ static void attach_tasks(struct lb_env *env)
>>       rq_unlock(env->dst_rq, &rf);
>>  }
>>
>> -#ifdef CONFIG_FAIR_GROUP_SCHED
>> -
>>  static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
>>  {
>>       if (cfs_rq->load.weight)
>> @@ -7354,11 +7354,14 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
>>       return true;
>>  }
>>
>> +#ifdef CONFIG_FAIR_GROUP_SCHED
>> +
>>  static void update_blocked_averages(int cpu)
>>  {
>>       struct rq *rq = cpu_rq(cpu);
>>       struct cfs_rq *cfs_rq, *pos;
>>       struct rq_flags rf;
>> +     bool done = true;
>>
>>       rq_lock_irqsave(rq, &rf);
>>       update_rq_clock(rq);
>> @@ -7388,10 +7391,14 @@ static void update_blocked_averages(int cpu)
>>                */
>>               if (cfs_rq_is_decayed(cfs_rq))
>>                       list_del_leaf_cfs_rq(cfs_rq);
>> +             else
>> +                     done = false;
>>       }
>>
>>  #ifdef CONFIG_NO_HZ_COMMON
>>       rq->last_blocked_load_update_tick = jiffies;
>> +     if (done)
>> +             rq->has_blocked_load = 0;
>>  #endif
>>       rq_unlock_irqrestore(rq, &rf);
>>  }
>> @@ -7454,6 +7461,8 @@ static inline void update_blocked_averages(int cpu)
>>       update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
>>  #ifdef CONFIG_NO_HZ_COMMON
>>       rq->last_blocked_load_update_tick = jiffies;
>> +     if (cfs_rq_is_decayed(cfs_rq))
>> +             rq->has_blocked_load = 0;
>>  #endif
>>       rq_unlock_irqrestore(rq, &rf);
>>  }
>> @@ -7789,18 +7798,25 @@ group_type group_classify(struct sched_group *group,
>>       return group_other;
>>  }
>>
>> -static void update_nohz_stats(struct rq *rq)
>> +static bool update_nohz_stats(struct rq *rq)
>>  {
>>  #ifdef CONFIG_NO_HZ_COMMON
>>       unsigned int cpu = rq->cpu;
>>
>> +     if (!rq->has_blocked_load)
>> +             return false;
>> +
>>       if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
>> -             return;
>> +             return false;
>>
>>       if (!time_after(jiffies, rq->last_blocked_load_update_tick))
>
> I forgot to ask this on the initial thread - what's the point of this condition ? At first glance I would have said that this would make more sense:
>
> if (!time_after(jiffies, rq->last_blocked_load_update_tick + msecs_to_jiffies(LOAD_AVG_PERIOD))
>         return false;
>
> But maybe it's simply there to skip an update if it has already been done in the same jiffy interval ?

That's exactly the purpose.

>
>> -             return;
>> +             return true;
>>
>>       update_blocked_averages(cpu);
>> +
>> +     return rq->has_blocked_load;
>> +#else
>> +     return false;
>>  #endif
>>  }
>>
>> @@ -7826,8 +7842,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>>       for_each_cpu_and(i, sched_group_span(group), env->cpus) {
>>               struct rq *rq = cpu_rq(i);
>>
>> -             if (env->flags & LBF_NOHZ_STATS)
>> -                     update_nohz_stats(rq);
>> +             if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq))
>> +                     env->flags |= LBF_NOHZ_AGAIN;
>>
>>               /* Bias balancing toward cpus of our domain */
>>               if (local_group)
>> @@ -7979,18 +7995,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>>       struct sg_lb_stats *local = &sds->local_stat;
>>       struct sg_lb_stats tmp_sgs;
>>       int load_idx, prefer_sibling = 0;
>> +     int has_blocked = READ_ONCE(nohz.has_blocked);
>>       bool overload = false;
>>
>>       if (child && child->flags & SD_PREFER_SIBLING)
>>               prefer_sibling = 1;
>>
>>  #ifdef CONFIG_NO_HZ_COMMON
>> -     if (env->idle == CPU_NEWLY_IDLE) {
>> +     if (env->idle == CPU_NEWLY_IDLE && has_blocked)
>>               env->flags |= LBF_NOHZ_STATS;
>> -
>> -             if (cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd)))
>> -                     nohz.next_stats = jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD);
>> -     }
>>  #endif
>>
>>       load_idx = get_sd_load_idx(env->sd, env->idle);
>> @@ -8046,6 +8059,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>>               sg = sg->next;
>>       } while (sg != env->sd->groups);
>>
>> +#ifdef CONFIG_NO_HZ_COMMON
>> +     if ((env->flags & LBF_NOHZ_AGAIN) &&
>> +         cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) {
>> +
>> +             WRITE_ONCE(nohz.next_blocked,
>> +                             jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD));
>> +     }
>> +#endif
>> +
>>       if (env->sd->flags & SD_NUMA)
>>               env->fbq_type = fbq_classify_group(&sds->busiest_stat);
>>
>> @@ -9069,6 +9091,8 @@ static void nohz_balancer_kick(struct rq *rq)
>>       struct sched_domain *sd;
>>       int nr_busy, i, cpu = rq->cpu;
>>       unsigned int flags = 0;
>> +     unsigned long has_blocked = READ_ONCE(nohz.has_blocked);
>> +     unsigned long next = READ_ONCE(nohz.next_blocked);
>
> What about something slightly more explicit, e.g. next_stats/next_blocked ? There's also nohz.next_balance referenced here so IMO it's best to keep things clear.

ok

>
>>
>>       if (unlikely(rq->idle_balance))
>>               return;
>> @@ -9086,7 +9110,7 @@ static void nohz_balancer_kick(struct rq *rq)
>>       if (likely(!atomic_read(&nohz.nr_cpus)))
>>               return;
>>
>> -     if (time_after(now, nohz.next_stats))
>> +     if (time_after(now, next) && has_blocked)
>>               flags = NOHZ_STATS_KICK;
>>
>>       if (time_before(now, nohz.next_balance))
>> @@ -9207,13 +9231,15 @@ void nohz_balance_enter_idle(int cpu)
>>       if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
>>               return;
>>
>> +     rq->has_blocked_load = 1;
>> +
>>       if (rq->nohz_tick_stopped)
>> -             return;
>> +             goto out;
>>
>>       /*
>>        * If we're a completely isolated CPU, we don't play.
>>        */
>> -     if (on_null_domain(cpu_rq(cpu)))
>> +     if (on_null_domain(rq))
>>               return;
>>
>>       rq->nohz_tick_stopped = 1;
>> @@ -9222,6 +9248,13 @@ void nohz_balance_enter_idle(int cpu)
>>       atomic_inc(&nohz.nr_cpus);
>>
>>       set_cpu_sd_state_idle(cpu);
>> +
>> +out:
>> +     /*
>> +      * Each time a cpu enter idle, we assume that it has blocked load and
>> +      * enable the periodic update of the load of idle cpus
>> +      */
>> +     WRITE_ONCE(nohz.has_blocked, 1);
>>  }
>>  #else
>>  static inline void nohz_balancer_kick(struct rq *rq) { }
>> @@ -9374,6 +9407,16 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>>
>>       SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
>>
>> +     /*
>> +      * We assume there will be no idle load after this update and clear
>> +      * the stats state. If a cpu enters idle in the mean time, it will
>
> s/stats state/has_blocked flag/ (repeated a few times in the comment block)

yes. I will update others as well

>
>> +      * set the stats state and trig another update of idle load.
>> +      * Because a cpu that becomes idle, is added to idle_cpus_mask before
>> +      * setting the stats state, we are sure to not clear the state and not
>> +      * check the load of an idle cpu.
>> +      */
>> +     WRITE_ONCE(nohz.has_blocked, 0);
>> +
>>       for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
>>               if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
>>                       continue;
>> @@ -9383,11 +9426,16 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>>                * work being done for other cpus. Next load
>>                * balancing owner will pick it up.
>>                */
>> -             if (need_resched())
>> -                     break;
>> +             if (need_resched()) {
>> +                     has_blocked_load = true;
>> +                     goto abort;
>> +             }
>>
>>               rq = cpu_rq(balance_cpu);
>>
>> +             update_blocked_averages(rq->cpu);
>> +             has_blocked_load |= rq->has_blocked_load;
>> +
>>               /*
>>                * If time for next balance is due,
>>                * do the balance.
>> @@ -9400,7 +9448,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>>                       cpu_load_update_idle(rq);
>>                       rq_unlock_irq(rq, &rf);
>>
>> -                     update_blocked_averages(rq->cpu);
>>                       if (flags & NOHZ_BALANCE_KICK)
>>                               rebalance_domains(rq, CPU_IDLE);
>>               }
>> @@ -9415,7 +9462,13 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>>       if (flags & NOHZ_BALANCE_KICK)
>>               rebalance_domains(this_rq, CPU_IDLE);
>>
>> -     nohz.next_stats = next_stats;
>> +     WRITE_ONCE(nohz.next_blocked,
>> +             now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>> +
>> +abort:
>> +     /* There is still blocked load, enable periodic update */
>> +     if (has_blocked_load)
>> +             WRITE_ONCE(nohz.has_blocked, 1);
>>
>>       /*
>>        * next_balance will be updated only when there is a need.
>> @@ -10046,6 +10099,7 @@ __init void init_sched_fair_class(void)
>>
>>  #ifdef CONFIG_NO_HZ_COMMON
>>       nohz.next_balance = jiffies;
>> +     nohz.next_blocked = jiffies;
>>       zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
>>  #endif
>>  #endif /* SMP */
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index e200045..ad9b929 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -723,6 +723,7 @@ struct rq {
>>  #ifdef CONFIG_SMP
>>       unsigned long last_load_update_tick;
>>       unsigned long last_blocked_load_update_tick;
>> +     unsigned int has_blocked_load;
>>  #endif /* CONFIG_SMP */
>>       unsigned int nohz_tick_stopped;
>>       atomic_t nohz_flags;
>>

  reply	other threads:[~2018-02-06 14:31 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-21 10:21 [RFC PATCH 0/5] sched: On remote stats updates Peter Zijlstra
2017-12-21 10:21 ` [RFC PATCH 1/5] sched: Convert nohz_flags to atomic_t Peter Zijlstra
2017-12-21 10:21 ` [RFC PATCH 2/5] sched: Add NOHZ_STATS_KICK Peter Zijlstra
2017-12-21 16:23   ` Vincent Guittot
2017-12-21 16:56     ` Vincent Guittot
2017-12-22  7:59       ` Peter Zijlstra
2017-12-22  8:05         ` Vincent Guittot
2017-12-22  8:29           ` Peter Zijlstra
2017-12-22  9:12             ` Peter Zijlstra
2017-12-22 14:31               ` Peter Zijlstra
2017-12-22 14:34                 ` Vincent Guittot
2017-12-22 14:32               ` Vincent Guittot
2017-12-22 18:56                 ` Peter Zijlstra
2017-12-22 20:42                   ` Peter Zijlstra
2018-01-02 15:44                     ` Morten Rasmussen
2018-01-15  9:43                       ` Peter Zijlstra
2018-01-18 10:32                         ` Morten Rasmussen
2018-01-03  9:16                     ` Vincent Guittot
2018-01-15  8:26                       ` Vincent Guittot
2018-01-18 10:38                         ` Morten Rasmussen
2018-01-24  8:25                           ` Vincent Guittot
2018-01-29 18:43                             ` Dietmar Eggemann
2018-01-30  8:00                               ` Vincent Guittot
2018-01-29 19:31                             ` Valentin Schneider
2018-01-30  8:32                               ` Vincent Guittot
2018-01-30 11:41                                 ` Valentin Schneider
2018-01-30 13:05                                   ` Vincent Guittot
2018-02-05 22:18                                   ` Valentin Schneider
2018-02-06  9:22                                     ` Vincent Guittot
2018-02-01 18:16                               ` Peter Zijlstra
2018-02-01 16:57                             ` Peter Zijlstra
2018-02-01 17:26                               ` Vincent Guittot
2018-02-01 18:10                             ` Peter Zijlstra
2018-02-01 19:11                               ` Vincent Guittot
2018-02-06  8:32                               ` [PATCH 1/3] sched: Stop nohz stats when decayed Vincent Guittot
2018-02-06  8:32                                 ` [PATCH 2/3] sched: reduce the periodic update duration Vincent Guittot
2018-02-06  8:32                                 ` [PATCH 3/3] sched: update blocked load when newly idle Vincent Guittot
2018-02-06 14:32                                   ` Valentin Schneider
2018-02-06 16:17                                     ` Vincent Guittot
2018-02-06 16:32                                       ` Valentin Schneider
2018-02-06  8:55                                 ` [PATCH 1/3] sched: Stop nohz stats when decayed Vincent Guittot
2018-02-06 14:16                                 ` Valentin Schneider
2018-02-06 14:31                                   ` Vincent Guittot [this message]
2018-02-01 16:55                           ` [RFC PATCH 2/5] sched: Add NOHZ_STATS_KICK Peter Zijlstra
2018-01-22  9:40                         ` Dietmar Eggemann
2018-01-22 10:23                           ` Vincent Guittot
2018-02-01 16:52                         ` Peter Zijlstra
2018-02-01 17:25                           ` Vincent Guittot
2017-12-22  7:56     ` Peter Zijlstra
2017-12-22  8:04       ` Vincent Guittot
2017-12-21 10:21 ` [RFC PATCH 3/5] sched: Restructure nohz_balance_kick Peter Zijlstra
2017-12-21 10:21 ` [RFC PATCH 4/5] sched: Add nohz stats balancing Peter Zijlstra
2017-12-21 10:21 ` [RFC PATCH 5/5] sched: Update blocked load from NEWIDLE Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAKfTPtCfN4_GP+XjAjqWQA_G3_3cQ3XzWHRx2zFTpvvgS44uFA@mail.gmail.com \
    --to=vincent.guittot@linaro.org \
    --cc=brendan.jackman@arm.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=morten.rasmussen@foss.arm.com \
    --cc=peterz@infradead.org \
    --cc=valentin.schneider@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.