All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/2] sched/fair: nohz.next_balance vs newly-idle CPUs
@ 2021-08-23 11:16 Valentin Schneider
  2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
  2021-08-23 11:17 ` [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle Valentin Schneider
  0 siblings, 2 replies; 13+ messages in thread
From: Valentin Schneider @ 2021-08-23 11:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Vincent Guittot, Dietmar Eggemann

Hi folks,

This was caught up by our testing on an arm64 RB5 board - that's an 8 CPUs
DynamIQ SoC with 4 littles, 3 mediums and 1 big. It seems to rely more on NOHZ
balancing than our other boards being tested, which highlighted that not
including a newly-idle CPU into nohz.next_balance can cause issues (especially
when the other CPUs have had their balance_interval inflated by pinned tasks).

As suggested by Vincent, the approach here is to mimic what was done for
nohz.has_blocked, which gives us sane(ish) ordering guarantees.

Revisions
=========

v2 -> v3
++++++++

o Rebased against latest tip/sched/core: 234b8ab6476c ("sched: Introduce
  dl_task_check_affinity() to check proposed affinity")
  
o Kept NOHZ_NEXT_KICK in NOHZ_KICK_MASK, but changed nohz_balancer_kick() to
  issue kicks with NOHZ_STATS_KICK | NOHZ_BALANCE_KICK instead (Dietmar)
o Added missing NOHZ_STATS_KICK gate for nohz.next_blocked update (Vincent)

v1 -> v2
++++++++

o Ditched the extra cpumasks and went with a sibling of nohz.has_blocked
  (Vincent) 

Cheers,
Valentin

Valentin Schneider (2):
  sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  sched/fair: Trigger nohz.next_balance updates when a CPU goes
    NOHZ-idle

 kernel/sched/fair.c  | 39 +++++++++++++++++++++++++++------------
 kernel/sched/sched.h |  8 +++++++-
 2 files changed, 34 insertions(+), 13 deletions(-)

--
2.25.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 11:16 [PATCH v3 0/2] sched/fair: nohz.next_balance vs newly-idle CPUs Valentin Schneider
@ 2021-08-23 11:16 ` Valentin Schneider
  2021-08-23 11:59   ` Peter Zijlstra
                     ` (3 more replies)
  2021-08-23 11:17 ` [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle Valentin Schneider
  1 sibling, 4 replies; 13+ messages in thread
From: Valentin Schneider @ 2021-08-23 11:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: Vincent Guittot, Peter Zijlstra, Ingo Molnar, Dietmar Eggemann

A following patch will trigger NOHZ idle balances as a means to update
nohz.next_balance. Vincent noted that blocked load updates can have
non-negligible overhead, which should be avoided if the intent is to only
update nohz.next_balance.

Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load
update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
expected.

Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
---
 kernel/sched/fair.c  | 24 ++++++++++++++----------
 kernel/sched/sched.h |  8 +++++++-
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6cd05f1d77ef..4a91f3027c92 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10342,7 +10342,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		goto out;
 
 	if (rq->nr_running >= 2) {
-		flags = NOHZ_KICK_MASK;
+		flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 		goto out;
 	}
 
@@ -10356,7 +10356,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 * on.
 		 */
 		if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 	}
@@ -10370,7 +10370,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 */
 		for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
 			if (sched_asym_prefer(i, cpu)) {
-				flags = NOHZ_KICK_MASK;
+				flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 				goto unlock;
 			}
 		}
@@ -10383,7 +10383,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 * to run the misfit task on.
 		 */
 		if (check_misfit_status(rq, sd)) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 
@@ -10410,7 +10410,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 */
 		nr_busy = atomic_read(&sds->nr_busy_cpus);
 		if (nr_busy > 1) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 	}
@@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	 * setting the flag, we are sure to not clear the state and not
 	 * check the load of an idle cpu.
 	 */
-	WRITE_ONCE(nohz.has_blocked, 0);
+	if (flags & NOHZ_STATS_KICK)
+		WRITE_ONCE(nohz.has_blocked, 0);
 
 	/*
 	 * Ensures that if we miss the CPU, we must see the has_blocked
@@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 		 * balancing owner will pick it up.
 		 */
 		if (need_resched()) {
-			has_blocked_load = true;
+			if (flags & NOHZ_STATS_KICK)
+				has_blocked_load = true;
 			goto abort;
 		}
 
 		rq = cpu_rq(balance_cpu);
 
-		has_blocked_load |= update_nohz_stats(rq);
+		if (flags & NOHZ_STATS_KICK)
+			has_blocked_load |= update_nohz_stats(rq);
 
 		/*
 		 * If time for next balance is due,
@@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	if (likely(update_next_balance))
 		nohz.next_balance = next_balance;
 
-	WRITE_ONCE(nohz.next_blocked,
-		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
+	if (flags & NOHZ_STATS_KICK)
+		WRITE_ONCE(nohz.next_blocked,
+			   now + msecs_to_jiffies(LOAD_AVG_PERIOD));
 
 abort:
 	/* There is still blocked load, enable periodic update */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e7e2bba5b520..30b7bd2ef25d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2706,12 +2706,18 @@ extern void cfs_bandwidth_usage_dec(void);
 #define NOHZ_BALANCE_KICK_BIT	0
 #define NOHZ_STATS_KICK_BIT	1
 #define NOHZ_NEWILB_KICK_BIT	2
+#define NOHZ_NEXT_KICK_BIT	3
 
+/* Run rebalance_domains() */
 #define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
+/* Update blocked load */
 #define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
+/* Update blocked load when entering idle */
 #define NOHZ_NEWILB_KICK	BIT(NOHZ_NEWILB_KICK_BIT)
+/* Update nohz.next_balance */
+#define NOHZ_NEXT_KICK		BIT(NOHZ_NEXT_KICK_BIT)
 
-#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
 
 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle
  2021-08-23 11:16 [PATCH v3 0/2] sched/fair: nohz.next_balance vs newly-idle CPUs Valentin Schneider
  2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
@ 2021-08-23 11:17 ` Valentin Schneider
  2021-08-24  9:08   ` Vincent Guittot
                     ` (2 more replies)
  1 sibling, 3 replies; 13+ messages in thread
From: Valentin Schneider @ 2021-08-23 11:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Vincent Guittot, Dietmar Eggemann

Consider a system with some NOHZ-idle CPUs, such that

  nohz.idle_cpus_mask = S
  nohz.next_balance = T

When a new CPU k goes NOHZ idle (nohz_balance_enter_idle()), we end up
with:

  nohz.idle_cpus_mask = S \U {k}
  nohz.next_balance = T

Note that the nohz.next_balance hasn't changed - it won't be updated until
a NOHZ balance is triggered. This is problematic if the newly NOHZ idle CPU
has an earlier rq.next_balance than the other NOHZ idle CPUs, IOW if:

  cpu_rq(k).next_balance < nohz.next_balance

In such scenarios, the existing nohz.next_balance will prevent any NOHZ
balance from happening, which itself will prevent nohz.next_balance from
being updated to this new cpu_rq(k).next_balance. Unnecessary load balance
delays of over 12ms caused by this were observed on an arm64 RB5 board.

Use the new nohz.needs_update flag to mark the presence of newly-idle CPUs
that need their rq->next_balance to be collated into
nohz.next_balance. Trigger a NOHZ_NEXT_KICK when the flag is set.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
---
 kernel/sched/fair.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a91f3027c92..081a9e54058a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5754,6 +5754,7 @@ static struct {
 	cpumask_var_t idle_cpus_mask;
 	atomic_t nr_cpus;
 	int has_blocked;		/* Idle CPUS has blocked load */
+	int needs_update;		/* Newly idle CPUs need their next_balance collated */
 	unsigned long next_balance;     /* in jiffy units */
 	unsigned long next_blocked;	/* Next update of blocked load in jiffies */
 } nohz ____cacheline_aligned;
@@ -10417,6 +10418,9 @@ static void nohz_balancer_kick(struct rq *rq)
 unlock:
 	rcu_read_unlock();
 out:
+	if (READ_ONCE(nohz.needs_update))
+		flags |= NOHZ_NEXT_KICK;
+
 	if (flags)
 		kick_ilb(flags);
 }
@@ -10513,12 +10517,13 @@ void nohz_balance_enter_idle(int cpu)
 	/*
 	 * Ensures that if nohz_idle_balance() fails to observe our
 	 * @idle_cpus_mask store, it must observe the @has_blocked
-	 * store.
+	 * and @needs_update stores.
 	 */
 	smp_mb__after_atomic();
 
 	set_cpu_sd_state_idle(cpu);
 
+	WRITE_ONCE(nohz.needs_update, 1);
 out:
 	/*
 	 * Each time a cpu enter idle, we assume that it has blocked load and
@@ -10567,13 +10572,17 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	/*
 	 * We assume there will be no idle load after this update and clear
 	 * the has_blocked flag. If a cpu enters idle in the mean time, it will
-	 * set the has_blocked flag and trig another update of idle load.
+	 * set the has_blocked flag and trigger another update of idle load.
 	 * Because a cpu that becomes idle, is added to idle_cpus_mask before
 	 * setting the flag, we are sure to not clear the state and not
 	 * check the load of an idle cpu.
+	 *
+	 * Same applies to idle_cpus_mask vs needs_update.
 	 */
 	if (flags & NOHZ_STATS_KICK)
 		WRITE_ONCE(nohz.has_blocked, 0);
+	if (flags & NOHZ_NEXT_KICK)
+		WRITE_ONCE(nohz.needs_update, 0);
 
 	/*
 	 * Ensures that if we miss the CPU, we must see the has_blocked
@@ -10597,6 +10606,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 		if (need_resched()) {
 			if (flags & NOHZ_STATS_KICK)
 				has_blocked_load = true;
+			if (flags & NOHZ_NEXT_KICK)
+				WRITE_ONCE(nohz.needs_update, 1);
 			goto abort;
 		}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
@ 2021-08-23 11:59   ` Peter Zijlstra
  2021-08-23 12:57     ` Valentin Schneider
  2021-08-24  9:08   ` Vincent Guittot
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2021-08-23 11:59 UTC (permalink / raw)
  To: Valentin Schneider
  Cc: linux-kernel, Vincent Guittot, Ingo Molnar, Dietmar Eggemann

On Mon, Aug 23, 2021 at 12:16:59PM +0100, Valentin Schneider wrote:

> Gate NOHZ blocked load
> update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
> kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
> expected.

> @@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>  	 * setting the flag, we are sure to not clear the state and not
>  	 * check the load of an idle cpu.
>  	 */
> -	WRITE_ONCE(nohz.has_blocked, 0);
> +	if (flags & NOHZ_STATS_KICK)
> +		WRITE_ONCE(nohz.has_blocked, 0);
>  
>  	/*
>  	 * Ensures that if we miss the CPU, we must see the has_blocked
> @@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>  		 * balancing owner will pick it up.
>  		 */
>  		if (need_resched()) {
> -			has_blocked_load = true;
> +			if (flags & NOHZ_STATS_KICK)
> +				has_blocked_load = true;
>  			goto abort;
>  		}
>  
>  		rq = cpu_rq(balance_cpu);
>  
> -		has_blocked_load |= update_nohz_stats(rq);
> +		if (flags & NOHZ_STATS_KICK)
> +			has_blocked_load |= update_nohz_stats(rq);
>  
>  		/*
>  		 * If time for next balance is due,
> @@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>  	if (likely(update_next_balance))
>  		nohz.next_balance = next_balance;
>  
> -	WRITE_ONCE(nohz.next_blocked,
> -		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
> +	if (flags & NOHZ_STATS_KICK)
> +		WRITE_ONCE(nohz.next_blocked,
> +			   now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>  
>  abort:
>  	/* There is still blocked load, enable periodic update */

I'm a bit puzzled by this; that function has:

  SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);

Which:

 - isn't updated
 - implies STATS must be set when BALANCE

the latter gives rise to my confusion; why add that gate on STATS? It
just doesn't make sense to do a BALANCE and not update STATS.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 11:59   ` Peter Zijlstra
@ 2021-08-23 12:57     ` Valentin Schneider
  2021-08-23 13:53       ` Dietmar Eggemann
  0 siblings, 1 reply; 13+ messages in thread
From: Valentin Schneider @ 2021-08-23 12:57 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Vincent Guittot, Ingo Molnar, Dietmar Eggemann

On 23/08/21 13:59, Peter Zijlstra wrote:
> On Mon, Aug 23, 2021 at 12:16:59PM +0100, Valentin Schneider wrote:
>
>> Gate NOHZ blocked load
>> update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
>> kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
>> expected.
>
>> @@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>>       * setting the flag, we are sure to not clear the state and not
>>       * check the load of an idle cpu.
>>       */
>> -	WRITE_ONCE(nohz.has_blocked, 0);
>> +	if (flags & NOHZ_STATS_KICK)
>> +		WRITE_ONCE(nohz.has_blocked, 0);
>>
>>      /*
>>       * Ensures that if we miss the CPU, we must see the has_blocked
>> @@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>>               * balancing owner will pick it up.
>>               */
>>              if (need_resched()) {
>> -			has_blocked_load = true;
>> +			if (flags & NOHZ_STATS_KICK)
>> +				has_blocked_load = true;
>>                      goto abort;
>>              }
>>
>>              rq = cpu_rq(balance_cpu);
>>
>> -		has_blocked_load |= update_nohz_stats(rq);
>> +		if (flags & NOHZ_STATS_KICK)
>> +			has_blocked_load |= update_nohz_stats(rq);
>>
>>              /*
>>               * If time for next balance is due,
>> @@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>>      if (likely(update_next_balance))
>>              nohz.next_balance = next_balance;
>>
>> -	WRITE_ONCE(nohz.next_blocked,
>> -		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>> +	if (flags & NOHZ_STATS_KICK)
>> +		WRITE_ONCE(nohz.next_blocked,
>> +			   now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>>
>>  abort:
>>      /* There is still blocked load, enable periodic update */
>
> I'm a bit puzzled by this; that function has:
>
>   SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
>
> Which:
>
>  - isn't updated
>  - implies STATS must be set when BALANCE

Yup

>
> the latter gives rise to my confusion; why add that gate on STATS? It
> just doesn't make sense to do a BALANCE and not update STATS.

AFAIA that warning was only there to catch BALANCE && !STATS, so I didn't
tweak it.

Now, you could still end up with

  flags == NOHZ_NEXT_KICK

(e.g. nohz.next_balance is in the future, but a new CPU entered NOHZ-idle
and needs its own rq.next_balance collated into the nohz struct)

in which case you don't do any blocked load update, hence the
gate. In v1 I had that piggyback on NOHZ_STATS_KICK, but Vincent noted
that might not be the best given blocked load updates can be time
consuming - hence the separate flag.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 12:57     ` Valentin Schneider
@ 2021-08-23 13:53       ` Dietmar Eggemann
  2021-08-24  8:11         ` Peter Zijlstra
  0 siblings, 1 reply; 13+ messages in thread
From: Dietmar Eggemann @ 2021-08-23 13:53 UTC (permalink / raw)
  To: Valentin Schneider, Peter Zijlstra
  Cc: linux-kernel, Vincent Guittot, Ingo Molnar

On 23/08/2021 14:57, Valentin Schneider wrote:
> On 23/08/21 13:59, Peter Zijlstra wrote:
>> On Mon, Aug 23, 2021 at 12:16:59PM +0100, Valentin Schneider wrote:
>>
>>> Gate NOHZ blocked load
>>> update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
>>> kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
>>> expected.
>>
>>> @@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>>>       * setting the flag, we are sure to not clear the state and not
>>>       * check the load of an idle cpu.
>>>       */
>>> -	WRITE_ONCE(nohz.has_blocked, 0);
>>> +	if (flags & NOHZ_STATS_KICK)
>>> +		WRITE_ONCE(nohz.has_blocked, 0);
>>>
>>>      /*
>>>       * Ensures that if we miss the CPU, we must see the has_blocked
>>> @@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>>>               * balancing owner will pick it up.
>>>               */
>>>              if (need_resched()) {
>>> -			has_blocked_load = true;
>>> +			if (flags & NOHZ_STATS_KICK)
>>> +				has_blocked_load = true;
>>>                      goto abort;
>>>              }
>>>
>>>              rq = cpu_rq(balance_cpu);
>>>
>>> -		has_blocked_load |= update_nohz_stats(rq);
>>> +		if (flags & NOHZ_STATS_KICK)
>>> +			has_blocked_load |= update_nohz_stats(rq);
>>>
>>>              /*
>>>               * If time for next balance is due,
>>> @@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>>>      if (likely(update_next_balance))
>>>              nohz.next_balance = next_balance;
>>>
>>> -	WRITE_ONCE(nohz.next_blocked,
>>> -		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>>> +	if (flags & NOHZ_STATS_KICK)
>>> +		WRITE_ONCE(nohz.next_blocked,
>>> +			   now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>>>
>>>  abort:
>>>      /* There is still blocked load, enable periodic update */
>>
>> I'm a bit puzzled by this; that function has:
>>
>>   SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
>>
>> Which:
>>
>>  - isn't updated
>>  - implies STATS must be set when BALANCE
> 
> Yup
> 
>>
>> the latter gives rise to my confusion; why add that gate on STATS? It
>> just doesn't make sense to do a BALANCE and not update STATS.
> 
> AFAIA that warning was only there to catch BALANCE && !STATS, so I didn't
> tweak it.
> 
> Now, you could still end up with
> 
>   flags == NOHZ_NEXT_KICK
> 
> (e.g. nohz.next_balance is in the future, but a new CPU entered NOHZ-idle
> and needs its own rq.next_balance collated into the nohz struct)
> 
> in which case you don't do any blocked load update, hence the
> gate. In v1 I had that piggyback on NOHZ_STATS_KICK, but Vincent noted
> that might not be the best given blocked load updates can be time
> consuming - hence the separate flag.

Maybe the confusion stems from the fact that the NOHZ_NEXT_KICK-set
changes are only introduced in 2/2?

@@ -10417,6 +10418,9 @@ static void nohz_balancer_kick(struct rq *rq)
 unlock:
 	rcu_read_unlock();
 out:
+	if (READ_ONCE(nohz.needs_update))
+		flags |= NOHZ_NEXT_KICK;
+

@@ -10513,12 +10517,13 @@ void nohz_balance_enter_idle(int cpu)

...

+	WRITE_ONCE(nohz.needs_update, 1);

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 13:53       ` Dietmar Eggemann
@ 2021-08-24  8:11         ` Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2021-08-24  8:11 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: Valentin Schneider, linux-kernel, Vincent Guittot, Ingo Molnar

On Mon, Aug 23, 2021 at 03:53:16PM +0200, Dietmar Eggemann wrote:
> >> I'm a bit puzzled by this; that function has:
> >>
> >>   SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
> >>
> >> Which:
> >>
> >>  - isn't updated
> >>  - implies STATS must be set when BALANCE
> > 
> > Yup
> > 
> >>
> >> the latter gives rise to my confusion; why add that gate on STATS? It
> >> just doesn't make sense to do a BALANCE and not update STATS.
> > 
> > AFAIA that warning was only there to catch BALANCE && !STATS, so I didn't
> > tweak it.
> > 
> > Now, you could still end up with
> > 
> >   flags == NOHZ_NEXT_KICK
> > 
> > (e.g. nohz.next_balance is in the future, but a new CPU entered NOHZ-idle
> > and needs its own rq.next_balance collated into the nohz struct)
> > 
> > in which case you don't do any blocked load update, hence the
> > gate. In v1 I had that piggyback on NOHZ_STATS_KICK, but Vincent noted
> > that might not be the best given blocked load updates can be time
> > consuming - hence the separate flag.
> 
> Maybe the confusion stems from the fact that the NOHZ_NEXT_KICK-set
> changes are only introduced in 2/2?
> 
> @@ -10417,6 +10418,9 @@ static void nohz_balancer_kick(struct rq *rq)
>  unlock:
>  	rcu_read_unlock();
>  out:
> +	if (READ_ONCE(nohz.needs_update))
> +		flags |= NOHZ_NEXT_KICK;
> +

The confusion was about how we'd ever get there and not have STATS set,
but i guess having it all nicely gated does make it saner.

Thanks!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
  2021-08-23 11:59   ` Peter Zijlstra
@ 2021-08-24  9:08   ` Vincent Guittot
  2021-09-09 11:18   ` [tip: sched/core] " tip-bot2 for Valentin Schneider
  2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
  3 siblings, 0 replies; 13+ messages in thread
From: Vincent Guittot @ 2021-08-24  9:08 UTC (permalink / raw)
  To: Valentin Schneider
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Dietmar Eggemann

On Mon, 23 Aug 2021 at 13:17, Valentin Schneider
<valentin.schneider@arm.com> wrote:
>
> A following patch will trigger NOHZ idle balances as a means to update
> nohz.next_balance. Vincent noted that blocked load updates can have
> non-negligible overhead, which should be avoided if the intent is to only
> update nohz.next_balance.
>
> Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load
> update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
> kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
> expected.
>
> Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
> Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
>  kernel/sched/fair.c  | 24 ++++++++++++++----------
>  kernel/sched/sched.h |  8 +++++++-
>  2 files changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 6cd05f1d77ef..4a91f3027c92 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -10342,7 +10342,7 @@ static void nohz_balancer_kick(struct rq *rq)
>                 goto out;
>
>         if (rq->nr_running >= 2) {
> -               flags = NOHZ_KICK_MASK;
> +               flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>                 goto out;
>         }
>
> @@ -10356,7 +10356,7 @@ static void nohz_balancer_kick(struct rq *rq)
>                  * on.
>                  */
>                 if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
> -                       flags = NOHZ_KICK_MASK;
> +                       flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>                         goto unlock;
>                 }
>         }
> @@ -10370,7 +10370,7 @@ static void nohz_balancer_kick(struct rq *rq)
>                  */
>                 for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
>                         if (sched_asym_prefer(i, cpu)) {
> -                               flags = NOHZ_KICK_MASK;
> +                               flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>                                 goto unlock;
>                         }
>                 }
> @@ -10383,7 +10383,7 @@ static void nohz_balancer_kick(struct rq *rq)
>                  * to run the misfit task on.
>                  */
>                 if (check_misfit_status(rq, sd)) {
> -                       flags = NOHZ_KICK_MASK;
> +                       flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>                         goto unlock;
>                 }
>
> @@ -10410,7 +10410,7 @@ static void nohz_balancer_kick(struct rq *rq)
>                  */
>                 nr_busy = atomic_read(&sds->nr_busy_cpus);
>                 if (nr_busy > 1) {
> -                       flags = NOHZ_KICK_MASK;
> +                       flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>                         goto unlock;
>                 }
>         }
> @@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>          * setting the flag, we are sure to not clear the state and not
>          * check the load of an idle cpu.
>          */
> -       WRITE_ONCE(nohz.has_blocked, 0);
> +       if (flags & NOHZ_STATS_KICK)
> +               WRITE_ONCE(nohz.has_blocked, 0);
>
>         /*
>          * Ensures that if we miss the CPU, we must see the has_blocked
> @@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>                  * balancing owner will pick it up.
>                  */
>                 if (need_resched()) {
> -                       has_blocked_load = true;
> +                       if (flags & NOHZ_STATS_KICK)
> +                               has_blocked_load = true;
>                         goto abort;
>                 }
>
>                 rq = cpu_rq(balance_cpu);
>
> -               has_blocked_load |= update_nohz_stats(rq);
> +               if (flags & NOHZ_STATS_KICK)
> +                       has_blocked_load |= update_nohz_stats(rq);
>
>                 /*
>                  * If time for next balance is due,
> @@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>         if (likely(update_next_balance))
>                 nohz.next_balance = next_balance;
>
> -       WRITE_ONCE(nohz.next_blocked,
> -               now + msecs_to_jiffies(LOAD_AVG_PERIOD));
> +       if (flags & NOHZ_STATS_KICK)
> +               WRITE_ONCE(nohz.next_blocked,
> +                          now + msecs_to_jiffies(LOAD_AVG_PERIOD));
>
>  abort:
>         /* There is still blocked load, enable periodic update */
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e7e2bba5b520..30b7bd2ef25d 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2706,12 +2706,18 @@ extern void cfs_bandwidth_usage_dec(void);
>  #define NOHZ_BALANCE_KICK_BIT  0
>  #define NOHZ_STATS_KICK_BIT    1
>  #define NOHZ_NEWILB_KICK_BIT   2
> +#define NOHZ_NEXT_KICK_BIT     3
>
> +/* Run rebalance_domains() */
>  #define NOHZ_BALANCE_KICK      BIT(NOHZ_BALANCE_KICK_BIT)
> +/* Update blocked load */
>  #define NOHZ_STATS_KICK                BIT(NOHZ_STATS_KICK_BIT)
> +/* Update blocked load when entering idle */
>  #define NOHZ_NEWILB_KICK       BIT(NOHZ_NEWILB_KICK_BIT)
> +/* Update nohz.next_balance */
> +#define NOHZ_NEXT_KICK         BIT(NOHZ_NEXT_KICK_BIT)
>
> -#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
> +#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
>
>  #define nohz_flags(cpu)        (&cpu_rq(cpu)->nohz_flags)
>
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle
  2021-08-23 11:17 ` [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle Valentin Schneider
@ 2021-08-24  9:08   ` Vincent Guittot
  2021-09-09 11:18   ` [tip: sched/core] " tip-bot2 for Valentin Schneider
  2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
  2 siblings, 0 replies; 13+ messages in thread
From: Vincent Guittot @ 2021-08-24  9:08 UTC (permalink / raw)
  To: Valentin Schneider
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Dietmar Eggemann

On Mon, 23 Aug 2021 at 13:17, Valentin Schneider
<valentin.schneider@arm.com> wrote:
>
> Consider a system with some NOHZ-idle CPUs, such that
>
>   nohz.idle_cpus_mask = S
>   nohz.next_balance = T
>
> When a new CPU k goes NOHZ idle (nohz_balance_enter_idle()), we end up
> with:
>
>   nohz.idle_cpus_mask = S \U {k}
>   nohz.next_balance = T
>
> Note that the nohz.next_balance hasn't changed - it won't be updated until
> a NOHZ balance is triggered. This is problematic if the newly NOHZ idle CPU
> has an earlier rq.next_balance than the other NOHZ idle CPUs, IOW if:
>
>   cpu_rq(k).next_balance < nohz.next_balance
>
> In such scenarios, the existing nohz.next_balance will prevent any NOHZ
> balance from happening, which itself will prevent nohz.next_balance from
> being updated to this new cpu_rq(k).next_balance. Unnecessary load balance
> delays of over 12ms caused by this were observed on an arm64 RB5 board.
>
> Use the new nohz.needs_update flag to mark the presence of newly-idle CPUs
> that need their rq->next_balance to be collated into
> nohz.next_balance. Trigger a NOHZ_NEXT_KICK when the flag is set.
>
> Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
>  kernel/sched/fair.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 4a91f3027c92..081a9e54058a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5754,6 +5754,7 @@ static struct {
>         cpumask_var_t idle_cpus_mask;
>         atomic_t nr_cpus;
>         int has_blocked;                /* Idle CPUS has blocked load */
> +       int needs_update;               /* Newly idle CPUs need their next_balance collated */
>         unsigned long next_balance;     /* in jiffy units */
>         unsigned long next_blocked;     /* Next update of blocked load in jiffies */
>  } nohz ____cacheline_aligned;
> @@ -10417,6 +10418,9 @@ static void nohz_balancer_kick(struct rq *rq)
>  unlock:
>         rcu_read_unlock();
>  out:
> +       if (READ_ONCE(nohz.needs_update))
> +               flags |= NOHZ_NEXT_KICK;
> +
>         if (flags)
>                 kick_ilb(flags);
>  }
> @@ -10513,12 +10517,13 @@ void nohz_balance_enter_idle(int cpu)
>         /*
>          * Ensures that if nohz_idle_balance() fails to observe our
>          * @idle_cpus_mask store, it must observe the @has_blocked
> -        * store.
> +        * and @needs_update stores.
>          */
>         smp_mb__after_atomic();
>
>         set_cpu_sd_state_idle(cpu);
>
> +       WRITE_ONCE(nohz.needs_update, 1);
>  out:
>         /*
>          * Each time a cpu enter idle, we assume that it has blocked load and
> @@ -10567,13 +10572,17 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>         /*
>          * We assume there will be no idle load after this update and clear
>          * the has_blocked flag. If a cpu enters idle in the mean time, it will
> -        * set the has_blocked flag and trig another update of idle load.
> +        * set the has_blocked flag and trigger another update of idle load.
>          * Because a cpu that becomes idle, is added to idle_cpus_mask before
>          * setting the flag, we are sure to not clear the state and not
>          * check the load of an idle cpu.
> +        *
> +        * Same applies to idle_cpus_mask vs needs_update.
>          */
>         if (flags & NOHZ_STATS_KICK)
>                 WRITE_ONCE(nohz.has_blocked, 0);
> +       if (flags & NOHZ_NEXT_KICK)
> +               WRITE_ONCE(nohz.needs_update, 0);
>
>         /*
>          * Ensures that if we miss the CPU, we must see the has_blocked
> @@ -10597,6 +10606,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
>                 if (need_resched()) {
>                         if (flags & NOHZ_STATS_KICK)
>                                 has_blocked_load = true;
> +                       if (flags & NOHZ_NEXT_KICK)
> +                               WRITE_ONCE(nohz.needs_update, 1);
>                         goto abort;
>                 }
>
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [tip: sched/core] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle
  2021-08-23 11:17 ` [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle Valentin Schneider
  2021-08-24  9:08   ` Vincent Guittot
@ 2021-09-09 11:18   ` tip-bot2 for Valentin Schneider
  2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
  2 siblings, 0 replies; 13+ messages in thread
From: tip-bot2 for Valentin Schneider @ 2021-09-09 11:18 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Valentin Schneider, Peter Zijlstra (Intel),
	Vincent Guittot, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     df100a6682d3d9d4b7cbb531a3f783035732ba92
Gitweb:        https://git.kernel.org/tip/df100a6682d3d9d4b7cbb531a3f783035732ba92
Author:        Valentin Schneider <valentin.schneider@arm.com>
AuthorDate:    Mon, 23 Aug 2021 12:17:00 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 09 Sep 2021 11:27:30 +02:00

sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle

Consider a system with some NOHZ-idle CPUs, such that

  nohz.idle_cpus_mask = S
  nohz.next_balance = T

When a new CPU k goes NOHZ idle (nohz_balance_enter_idle()), we end up
with:

  nohz.idle_cpus_mask = S \U {k}
  nohz.next_balance = T

Note that the nohz.next_balance hasn't changed - it won't be updated until
a NOHZ balance is triggered. This is problematic if the newly NOHZ idle CPU
has an earlier rq.next_balance than the other NOHZ idle CPUs, IOW if:

  cpu_rq(k).next_balance < nohz.next_balance

In such scenarios, the existing nohz.next_balance will prevent any NOHZ
balance from happening, which itself will prevent nohz.next_balance from
being updated to this new cpu_rq(k).next_balance. Unnecessary load balance
delays of over 12ms caused by this were observed on an arm64 RB5 board.

Use the new nohz.needs_update flag to mark the presence of newly-idle CPUs
that need their rq->next_balance to be collated into
nohz.next_balance. Trigger a NOHZ_NEXT_KICK when the flag is set.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210823111700.2842997-3-valentin.schneider@arm.com
---
 kernel/sched/fair.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 48ce754..2a5efde 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5754,6 +5754,7 @@ static struct {
 	cpumask_var_t idle_cpus_mask;
 	atomic_t nr_cpus;
 	int has_blocked;		/* Idle CPUS has blocked load */
+	int needs_update;		/* Newly idle CPUs need their next_balance collated */
 	unsigned long next_balance;     /* in jiffy units */
 	unsigned long next_blocked;	/* Next update of blocked load in jiffies */
 } nohz ____cacheline_aligned;
@@ -10417,6 +10418,9 @@ static void nohz_balancer_kick(struct rq *rq)
 unlock:
 	rcu_read_unlock();
 out:
+	if (READ_ONCE(nohz.needs_update))
+		flags |= NOHZ_NEXT_KICK;
+
 	if (flags)
 		kick_ilb(flags);
 }
@@ -10513,12 +10517,13 @@ void nohz_balance_enter_idle(int cpu)
 	/*
 	 * Ensures that if nohz_idle_balance() fails to observe our
 	 * @idle_cpus_mask store, it must observe the @has_blocked
-	 * store.
+	 * and @needs_update stores.
 	 */
 	smp_mb__after_atomic();
 
 	set_cpu_sd_state_idle(cpu);
 
+	WRITE_ONCE(nohz.needs_update, 1);
 out:
 	/*
 	 * Each time a cpu enter idle, we assume that it has blocked load and
@@ -10567,13 +10572,17 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	/*
 	 * We assume there will be no idle load after this update and clear
 	 * the has_blocked flag. If a cpu enters idle in the mean time, it will
-	 * set the has_blocked flag and trig another update of idle load.
+	 * set the has_blocked flag and trigger another update of idle load.
 	 * Because a cpu that becomes idle, is added to idle_cpus_mask before
 	 * setting the flag, we are sure to not clear the state and not
 	 * check the load of an idle cpu.
+	 *
+	 * Same applies to idle_cpus_mask vs needs_update.
 	 */
 	if (flags & NOHZ_STATS_KICK)
 		WRITE_ONCE(nohz.has_blocked, 0);
+	if (flags & NOHZ_NEXT_KICK)
+		WRITE_ONCE(nohz.needs_update, 0);
 
 	/*
 	 * Ensures that if we miss the CPU, we must see the has_blocked
@@ -10597,6 +10606,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 		if (need_resched()) {
 			if (flags & NOHZ_STATS_KICK)
 				has_blocked_load = true;
+			if (flags & NOHZ_NEXT_KICK)
+				WRITE_ONCE(nohz.needs_update, 1);
 			goto abort;
 		}
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip: sched/core] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
  2021-08-23 11:59   ` Peter Zijlstra
  2021-08-24  9:08   ` Vincent Guittot
@ 2021-09-09 11:18   ` tip-bot2 for Valentin Schneider
  2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
  3 siblings, 0 replies; 13+ messages in thread
From: tip-bot2 for Valentin Schneider @ 2021-09-09 11:18 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Vincent Guittot, Valentin Schneider, Peter Zijlstra (Intel),
	x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     013ce5ed58f799a2f035b732f904f6ebd8e8d881
Gitweb:        https://git.kernel.org/tip/013ce5ed58f799a2f035b732f904f6ebd8e8d881
Author:        Valentin Schneider <valentin.schneider@arm.com>
AuthorDate:    Mon, 23 Aug 2021 12:16:59 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 09 Sep 2021 11:27:29 +02:00

sched/fair: Add NOHZ balancer flag for nohz.next_balance updates

A following patch will trigger NOHZ idle balances as a means to update
nohz.next_balance. Vincent noted that blocked load updates can have
non-negligible overhead, which should be avoided if the intent is to only
update nohz.next_balance.

Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load
update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
expected.

Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210823111700.2842997-2-valentin.schneider@arm.com
---
 kernel/sched/fair.c  | 24 ++++++++++++++----------
 kernel/sched/sched.h |  8 +++++++-
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7b3e859..48ce754 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10342,7 +10342,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		goto out;
 
 	if (rq->nr_running >= 2) {
-		flags = NOHZ_KICK_MASK;
+		flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 		goto out;
 	}
 
@@ -10356,7 +10356,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 * on.
 		 */
 		if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 	}
@@ -10370,7 +10370,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 */
 		for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
 			if (sched_asym_prefer(i, cpu)) {
-				flags = NOHZ_KICK_MASK;
+				flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 				goto unlock;
 			}
 		}
@@ -10383,7 +10383,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 * to run the misfit task on.
 		 */
 		if (check_misfit_status(rq, sd)) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 
@@ -10410,7 +10410,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 */
 		nr_busy = atomic_read(&sds->nr_busy_cpus);
 		if (nr_busy > 1) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 	}
@@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	 * setting the flag, we are sure to not clear the state and not
 	 * check the load of an idle cpu.
 	 */
-	WRITE_ONCE(nohz.has_blocked, 0);
+	if (flags & NOHZ_STATS_KICK)
+		WRITE_ONCE(nohz.has_blocked, 0);
 
 	/*
 	 * Ensures that if we miss the CPU, we must see the has_blocked
@@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 		 * balancing owner will pick it up.
 		 */
 		if (need_resched()) {
-			has_blocked_load = true;
+			if (flags & NOHZ_STATS_KICK)
+				has_blocked_load = true;
 			goto abort;
 		}
 
 		rq = cpu_rq(balance_cpu);
 
-		has_blocked_load |= update_nohz_stats(rq);
+		if (flags & NOHZ_STATS_KICK)
+			has_blocked_load |= update_nohz_stats(rq);
 
 		/*
 		 * If time for next balance is due,
@@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	if (likely(update_next_balance))
 		nohz.next_balance = next_balance;
 
-	WRITE_ONCE(nohz.next_blocked,
-		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
+	if (flags & NOHZ_STATS_KICK)
+		WRITE_ONCE(nohz.next_blocked,
+			   now + msecs_to_jiffies(LOAD_AVG_PERIOD));
 
 abort:
 	/* There is still blocked load, enable periodic update */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e7e2bba..30b7bd2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2706,12 +2706,18 @@ extern void cfs_bandwidth_usage_dec(void);
 #define NOHZ_BALANCE_KICK_BIT	0
 #define NOHZ_STATS_KICK_BIT	1
 #define NOHZ_NEWILB_KICK_BIT	2
+#define NOHZ_NEXT_KICK_BIT	3
 
+/* Run rebalance_domains() */
 #define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
+/* Update blocked load */
 #define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
+/* Update blocked load when entering idle */
 #define NOHZ_NEWILB_KICK	BIT(NOHZ_NEWILB_KICK_BIT)
+/* Update nohz.next_balance */
+#define NOHZ_NEXT_KICK		BIT(NOHZ_NEXT_KICK_BIT)
 
-#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
 
 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip: sched/core] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle
  2021-08-23 11:17 ` [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle Valentin Schneider
  2021-08-24  9:08   ` Vincent Guittot
  2021-09-09 11:18   ` [tip: sched/core] " tip-bot2 for Valentin Schneider
@ 2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
  2 siblings, 0 replies; 13+ messages in thread
From: tip-bot2 for Valentin Schneider @ 2021-10-05 14:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Valentin Schneider, Peter Zijlstra (Intel),
	Vincent Guittot, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     7fd7a9e0caba10829b4f8db1aa7711b558681fd4
Gitweb:        https://git.kernel.org/tip/7fd7a9e0caba10829b4f8db1aa7711b558681fd4
Author:        Valentin Schneider <valentin.schneider@arm.com>
AuthorDate:    Mon, 23 Aug 2021 12:17:00 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 05 Oct 2021 15:51:31 +02:00

sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle

Consider a system with some NOHZ-idle CPUs, such that

  nohz.idle_cpus_mask = S
  nohz.next_balance = T

When a new CPU k goes NOHZ idle (nohz_balance_enter_idle()), we end up
with:

  nohz.idle_cpus_mask = S \U {k}
  nohz.next_balance = T

Note that the nohz.next_balance hasn't changed - it won't be updated until
a NOHZ balance is triggered. This is problematic if the newly NOHZ idle CPU
has an earlier rq.next_balance than the other NOHZ idle CPUs, IOW if:

  cpu_rq(k).next_balance < nohz.next_balance

In such scenarios, the existing nohz.next_balance will prevent any NOHZ
balance from happening, which itself will prevent nohz.next_balance from
being updated to this new cpu_rq(k).next_balance. Unnecessary load balance
delays of over 12ms caused by this were observed on an arm64 RB5 board.

Use the new nohz.needs_update flag to mark the presence of newly-idle CPUs
that need their rq->next_balance to be collated into
nohz.next_balance. Trigger a NOHZ_NEXT_KICK when the flag is set.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210823111700.2842997-3-valentin.schneider@arm.com
---
 kernel/sched/fair.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f4de7f5..6cc958e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5787,6 +5787,7 @@ static struct {
 	cpumask_var_t idle_cpus_mask;
 	atomic_t nr_cpus;
 	int has_blocked;		/* Idle CPUS has blocked load */
+	int needs_update;		/* Newly idle CPUs need their next_balance collated */
 	unsigned long next_balance;     /* in jiffy units */
 	unsigned long next_blocked;	/* Next update of blocked load in jiffies */
 } nohz ____cacheline_aligned;
@@ -10450,6 +10451,9 @@ static void nohz_balancer_kick(struct rq *rq)
 unlock:
 	rcu_read_unlock();
 out:
+	if (READ_ONCE(nohz.needs_update))
+		flags |= NOHZ_NEXT_KICK;
+
 	if (flags)
 		kick_ilb(flags);
 }
@@ -10546,12 +10550,13 @@ void nohz_balance_enter_idle(int cpu)
 	/*
 	 * Ensures that if nohz_idle_balance() fails to observe our
 	 * @idle_cpus_mask store, it must observe the @has_blocked
-	 * store.
+	 * and @needs_update stores.
 	 */
 	smp_mb__after_atomic();
 
 	set_cpu_sd_state_idle(cpu);
 
+	WRITE_ONCE(nohz.needs_update, 1);
 out:
 	/*
 	 * Each time a cpu enter idle, we assume that it has blocked load and
@@ -10600,13 +10605,17 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	/*
 	 * We assume there will be no idle load after this update and clear
 	 * the has_blocked flag. If a cpu enters idle in the mean time, it will
-	 * set the has_blocked flag and trig another update of idle load.
+	 * set the has_blocked flag and trigger another update of idle load.
 	 * Because a cpu that becomes idle, is added to idle_cpus_mask before
 	 * setting the flag, we are sure to not clear the state and not
 	 * check the load of an idle cpu.
+	 *
+	 * Same applies to idle_cpus_mask vs needs_update.
 	 */
 	if (flags & NOHZ_STATS_KICK)
 		WRITE_ONCE(nohz.has_blocked, 0);
+	if (flags & NOHZ_NEXT_KICK)
+		WRITE_ONCE(nohz.needs_update, 0);
 
 	/*
 	 * Ensures that if we miss the CPU, we must see the has_blocked
@@ -10630,6 +10639,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 		if (need_resched()) {
 			if (flags & NOHZ_STATS_KICK)
 				has_blocked_load = true;
+			if (flags & NOHZ_NEXT_KICK)
+				WRITE_ONCE(nohz.needs_update, 1);
 			goto abort;
 		}
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip: sched/core] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
  2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
                     ` (2 preceding siblings ...)
  2021-09-09 11:18   ` [tip: sched/core] " tip-bot2 for Valentin Schneider
@ 2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
  3 siblings, 0 replies; 13+ messages in thread
From: tip-bot2 for Valentin Schneider @ 2021-10-05 14:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Vincent Guittot, Valentin Schneider, Peter Zijlstra (Intel),
	x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     efd984c481abb516fab8bafb25bf41fd9397a43c
Gitweb:        https://git.kernel.org/tip/efd984c481abb516fab8bafb25bf41fd9397a43c
Author:        Valentin Schneider <valentin.schneider@arm.com>
AuthorDate:    Mon, 23 Aug 2021 12:16:59 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 05 Oct 2021 15:51:30 +02:00

sched/fair: Add NOHZ balancer flag for nohz.next_balance updates

A following patch will trigger NOHZ idle balances as a means to update
nohz.next_balance. Vincent noted that blocked load updates can have
non-negligible overhead, which should be avoided if the intent is to only
update nohz.next_balance.

Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load
update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
expected.

Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210823111700.2842997-2-valentin.schneider@arm.com
---
 kernel/sched/fair.c  | 24 ++++++++++++++----------
 kernel/sched/sched.h |  8 +++++++-
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f6a05d9..f4de7f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10375,7 +10375,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		goto out;
 
 	if (rq->nr_running >= 2) {
-		flags = NOHZ_KICK_MASK;
+		flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 		goto out;
 	}
 
@@ -10389,7 +10389,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 * on.
 		 */
 		if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 	}
@@ -10403,7 +10403,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 */
 		for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
 			if (sched_asym_prefer(i, cpu)) {
-				flags = NOHZ_KICK_MASK;
+				flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 				goto unlock;
 			}
 		}
@@ -10416,7 +10416,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 * to run the misfit task on.
 		 */
 		if (check_misfit_status(rq, sd)) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 
@@ -10443,7 +10443,7 @@ static void nohz_balancer_kick(struct rq *rq)
 		 */
 		nr_busy = atomic_read(&sds->nr_busy_cpus);
 		if (nr_busy > 1) {
-			flags = NOHZ_KICK_MASK;
+			flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
 			goto unlock;
 		}
 	}
@@ -10605,7 +10605,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	 * setting the flag, we are sure to not clear the state and not
 	 * check the load of an idle cpu.
 	 */
-	WRITE_ONCE(nohz.has_blocked, 0);
+	if (flags & NOHZ_STATS_KICK)
+		WRITE_ONCE(nohz.has_blocked, 0);
 
 	/*
 	 * Ensures that if we miss the CPU, we must see the has_blocked
@@ -10627,13 +10628,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 		 * balancing owner will pick it up.
 		 */
 		if (need_resched()) {
-			has_blocked_load = true;
+			if (flags & NOHZ_STATS_KICK)
+				has_blocked_load = true;
 			goto abort;
 		}
 
 		rq = cpu_rq(balance_cpu);
 
-		has_blocked_load |= update_nohz_stats(rq);
+		if (flags & NOHZ_STATS_KICK)
+			has_blocked_load |= update_nohz_stats(rq);
 
 		/*
 		 * If time for next balance is due,
@@ -10664,8 +10667,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
 	if (likely(update_next_balance))
 		nohz.next_balance = next_balance;
 
-	WRITE_ONCE(nohz.next_blocked,
-		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
+	if (flags & NOHZ_STATS_KICK)
+		WRITE_ONCE(nohz.next_blocked,
+			   now + msecs_to_jiffies(LOAD_AVG_PERIOD));
 
 abort:
 	/* There is still blocked load, enable periodic update */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3d3e579..1fec313 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2709,12 +2709,18 @@ extern void cfs_bandwidth_usage_dec(void);
 #define NOHZ_BALANCE_KICK_BIT	0
 #define NOHZ_STATS_KICK_BIT	1
 #define NOHZ_NEWILB_KICK_BIT	2
+#define NOHZ_NEXT_KICK_BIT	3
 
+/* Run rebalance_domains() */
 #define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
+/* Update blocked load */
 #define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
+/* Update blocked load when entering idle */
 #define NOHZ_NEWILB_KICK	BIT(NOHZ_NEWILB_KICK_BIT)
+/* Update nohz.next_balance */
+#define NOHZ_NEXT_KICK		BIT(NOHZ_NEXT_KICK_BIT)
 
-#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
 
 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-10-05 14:13 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-23 11:16 [PATCH v3 0/2] sched/fair: nohz.next_balance vs newly-idle CPUs Valentin Schneider
2021-08-23 11:16 ` [PATCH v3 1/2] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates Valentin Schneider
2021-08-23 11:59   ` Peter Zijlstra
2021-08-23 12:57     ` Valentin Schneider
2021-08-23 13:53       ` Dietmar Eggemann
2021-08-24  8:11         ` Peter Zijlstra
2021-08-24  9:08   ` Vincent Guittot
2021-09-09 11:18   ` [tip: sched/core] " tip-bot2 for Valentin Schneider
2021-10-05 14:12   ` tip-bot2 for Valentin Schneider
2021-08-23 11:17 ` [PATCH v3 2/2] sched/fair: Trigger nohz.next_balance updates when a CPU goes NOHZ-idle Valentin Schneider
2021-08-24  9:08   ` Vincent Guittot
2021-09-09 11:18   ` [tip: sched/core] " tip-bot2 for Valentin Schneider
2021-10-05 14:12   ` tip-bot2 for Valentin Schneider

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.