[GIT,PULL] scheduler fixes
diff mbox series

Message ID 20191116213742.GA7450@gmail.com
State Superseded
Headers show
Series
  • [GIT,PULL] scheduler fixes
Related show

Commit Message

Ingo Molnar Nov. 16, 2019, 9:37 p.m. UTC
Linus,

Please pull the latest sched-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched-urgent-for-linus

   # HEAD: 48a723d23b0d957e5b5861b974864e53c6841de8 sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks

Misc fixes:

 - Fix potential deadlock under CONFIG_DEBUG_OBJECTS=y
 - PELT metrics update ordering fix
 - uclamp logic fix
 - a type casting fix
 - final fix (hopefully) for Juno r0 2+4 big.LITTLE systems.

 Thanks,

	Ingo

------------------>
Peter Zijlstra (1):
      sched/core: Avoid spurious lock dependencies

Qais Yousef (1):
      sched/uclamp: Fix incorrect condition

Valentin Schneider (2):
      sched/uclamp: Fix overzealous type replacement
      sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks

Vincent Guittot (1):
      sched/pelt: Fix update of blocked PELT ordering


 kernel/cgroup/cpuset.c |  8 +++++++-
 kernel/sched/core.c    |  9 +++++----
 kernel/sched/fair.c    | 29 ++++++++++++++++++++---------
 kernel/sched/sched.h   |  2 +-
 4 files changed, 33 insertions(+), 15 deletions(-)

Comments

Valentin Schneider Nov. 16, 2019, 10:44 p.m. UTC | #1
Hi,

On 16/11/2019 21:37, Ingo Molnar wrote:
> Peter Zijlstra (1):
>       sched/core: Avoid spurious lock dependencies
> 
> Qais Yousef (1):
>       sched/uclamp: Fix incorrect condition
> 
> Valentin Schneider (2):
>       sched/uclamp: Fix overzealous type replacement

This one got a v2 (was missing one location), acked by Vincent:

  20191115103908.27610-1-valentin.schneider@arm.com

>       sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks

And this one is no longer needed, as Michal & I understood (IOW the fix in
rc6 is sufficient), see:

  c425c5cb-ba8a-e5f6-d91c-5479779cfb7a@arm.com

> 
> Vincent Guittot (1):
>       sched/pelt: Fix update of blocked PELT ordering
> 
> 
>  kernel/cgroup/cpuset.c |  8 +++++++-
>  kernel/sched/core.c    |  9 +++++----
>  kernel/sched/fair.c    | 29 ++++++++++++++++++++---------
>  kernel/sched/sched.h   |  2 +-
>  4 files changed, 33 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index c87ee6412b36..e4c10785dc7c 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -798,8 +798,14 @@ static int generate_sched_domains(cpumask_var_t **domains,
>  		    cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
>  			continue;
>  
> +		/*
> +		 * Skip cpusets that would lead to an empty sched domain.
> +		 * That could be because effective_cpus is empty, or because
> +		 * it's only spanning CPUs outside the housekeeping mask.
> +		 */
>  		if (is_sched_load_balance(cp) &&
> -		    !cpumask_empty(cp->effective_cpus))
> +		    cpumask_intersects(cp->effective_cpus,
> +				       housekeeping_cpumask(HK_FLAG_DOMAIN)))
>  			csa[csn++] = cp;
>  
>  		/* skip @cp's subtree if not a partition root */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 0f2eb3629070..a4f76d3f5011 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -853,7 +853,7 @@ static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
>  }
>  
>  static inline
> -enum uclamp_id uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
> +unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
>  				   unsigned int clamp_value)
>  {
>  	struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
> @@ -918,7 +918,7 @@ uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
>  	return uc_req;
>  }
>  
> -enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
> +unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
>  {
>  	struct uclamp_se uc_eff;
>  
> @@ -1065,7 +1065,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
>  	 * affecting a valid clamp bucket, the next time it's enqueued,
>  	 * it will already see the updated clamp bucket value.
>  	 */
> -	if (!p->uclamp[clamp_id].active) {
> +	if (p->uclamp[clamp_id].active) {
>  		uclamp_rq_dec_id(rq, p, clamp_id);
>  		uclamp_rq_inc_id(rq, p, clamp_id);
>  	}
> @@ -6019,10 +6019,11 @@ void init_idle(struct task_struct *idle, int cpu)
>  	struct rq *rq = cpu_rq(cpu);
>  	unsigned long flags;
>  
> +	__sched_fork(0, idle);
> +
>  	raw_spin_lock_irqsave(&idle->pi_lock, flags);
>  	raw_spin_lock(&rq->lock);
>  
> -	__sched_fork(0, idle);
>  	idle->state = TASK_RUNNING;
>  	idle->se.exec_start = sched_clock();
>  	idle->flags |= PF_IDLE;
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 22a2fed29054..69a81a5709ff 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7547,6 +7547,19 @@ static void update_blocked_averages(int cpu)
>  	rq_lock_irqsave(rq, &rf);
>  	update_rq_clock(rq);
>  
> +	/*
> +	 * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
> +	 * that RT, DL and IRQ signals have been updated before updating CFS.
> +	 */
> +	curr_class = rq->curr->sched_class;
> +	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> +	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> +	update_irq_load_avg(rq, 0);
> +
> +	/* Don't need periodic decay once load/util_avg are null */
> +	if (others_have_blocked(rq))
> +		done = false;
> +
>  	/*
>  	 * Iterates the task_group tree in a bottom up fashion, see
>  	 * list_add_leaf_cfs_rq() for details.
> @@ -7574,14 +7587,6 @@ static void update_blocked_averages(int cpu)
>  			done = false;
>  	}
>  
> -	curr_class = rq->curr->sched_class;
> -	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> -	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> -	update_irq_load_avg(rq, 0);
> -	/* Don't need periodic decay once load/util_avg are null */
> -	if (others_have_blocked(rq))
> -		done = false;
> -
>  	update_blocked_load_status(rq, !done);
>  	rq_unlock_irqrestore(rq, &rf);
>  }
> @@ -7642,12 +7647,18 @@ static inline void update_blocked_averages(int cpu)
>  
>  	rq_lock_irqsave(rq, &rf);
>  	update_rq_clock(rq);
> -	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
>  
> +	/*
> +	 * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
> +	 * that RT, DL and IRQ signals have been updated before updating CFS.
> +	 */
>  	curr_class = rq->curr->sched_class;
>  	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>  	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>  	update_irq_load_avg(rq, 0);
> +
> +	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
> +
>  	update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
>  	rq_unlock_irqrestore(rq, &rf);
>  }
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index c8870c5bd7df..49ed949f850c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2309,7 +2309,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
>  #endif /* CONFIG_CPU_FREQ */
>  
>  #ifdef CONFIG_UCLAMP_TASK
> -enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
> +unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
>  
>  static __always_inline
>  unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
>
Linus Torvalds Nov. 17, 2019, 12:10 a.m. UTC | #2
On Sat, Nov 16, 2019 at 2:44 PM Valentin Schneider
<valentin.schneider@arm.com> wrote:
>
> > Valentin Schneider (2):
> >       sched/uclamp: Fix overzealous type replacement
>
> This one got a v2 (was missing one location), acked by Vincent:
>
>   20191115103908.27610-1-valentin.schneider@arm.com
>
> >       sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks
>
> And this one is no longer needed, as Michal & I understood (IOW the fix in
> rc6 is sufficient), see:
>
>   c425c5cb-ba8a-e5f6-d91c-5479779cfb7a@arm.com

Ingo, what do you want me to do? Pull it anyway and send updates
later? Or skip this pull request?

I'll leave it pending for now,

              Linus
Ingo Molnar Nov. 17, 2019, 9:31 a.m. UTC | #3
* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sat, Nov 16, 2019 at 2:44 PM Valentin Schneider
> <valentin.schneider@arm.com> wrote:
> >
> > > Valentin Schneider (2):
> > >       sched/uclamp: Fix overzealous type replacement
> >
> > This one got a v2 (was missing one location), acked by Vincent:
> >
> >   20191115103908.27610-1-valentin.schneider@arm.com
> >
> > >       sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks
> >
> > And this one is no longer needed, as Michal & I understood (IOW the fix in
> > rc6 is sufficient), see:
> >
> >   c425c5cb-ba8a-e5f6-d91c-5479779cfb7a@arm.com
> 
> Ingo, what do you want me to do? Pull it anyway and send updates
> later? Or skip this pull request?
> 
> I'll leave it pending for now,

Yeah, please don't pull - will rework it. Sorry ...

Thanks,

	Ingo
Ingo Molnar Nov. 17, 2019, 9:45 a.m. UTC | #4
* Valentin Schneider <valentin.schneider@arm.com> wrote:

> Hi,
> 
> On 16/11/2019 21:37, Ingo Molnar wrote:
> > Peter Zijlstra (1):
> >       sched/core: Avoid spurious lock dependencies
> > 
> > Qais Yousef (1):
> >       sched/uclamp: Fix incorrect condition
> > 
> > Valentin Schneider (2):
> >       sched/uclamp: Fix overzealous type replacement
> 
> This one got a v2 (was missing one location), acked by Vincent:
> 
>   20191115103908.27610-1-valentin.schneider@arm.com

I've picked v2 up instead. I suspect it's not really consequential as 
enums don't really get truncated by compilers, right? Is there any other 
negative runtime side effect possible from the imprecise enum/uint 
typing?

> >       sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks
> 
> And this one is no longer needed, as Michal & I understood (IOW the fix in
> rc6 is sufficient), see:
> 
>   c425c5cb-ba8a-e5f6-d91c-5479779cfb7a@arm.com

Ok.

I'm inclined to just reduce sched/urgent back to these three fixes:

  6e1ff0773f49: sched/uclamp: Fix incorrect condition
  b90f7c9d2198: sched/pelt: Fix update of blocked PELT ordering
  ff51ff84d82a: sched/core: Avoid spurious lock dependencies

and apply v2 of the uclamp_id type fix to sched/core. This would reduce 
the risks of a Sunday pull request ...

Thanks,

	Ingo
Valentin Schneider Nov. 17, 2019, 10:19 a.m. UTC | #5
On 17/11/2019 09:45, Ingo Molnar wrote:
> I've picked v2 up instead. I suspect it's not really consequential as 
> enums don't really get truncated by compilers, right? Is there any other 
> negative runtime side effect possible from the imprecise enum/uint 
> typing?
> 

AFAIUI the requirement for the enum type is that it has to be an int type that
covers all its values, so I could see some funky optimization (e.g. check the
returned value is < 512 but it's assumed the type for the enum is 8 bits so
this becomes always true). Then again we don't have any explicit check on
those returned values, plus they fit in 11 bits, so as you say it's
mostly likely inconsequential (and I didn't see any compile diff).

My "worry" wasn't really about this patch, it was more about the following
one - it didn't like the idea of merging an unneeded patch (with a Fixes:
tag on top of it).

>>>       sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks
>>
>> And this one is no longer needed, as Michal & I understood (IOW the fix in
>> rc6 is sufficient), see:
>>
>>   c425c5cb-ba8a-e5f6-d91c-5479779cfb7a@arm.com
> 
> Ok.
> 
> I'm inclined to just reduce sched/urgent back to these three fixes:
> 
>   6e1ff0773f49: sched/uclamp: Fix incorrect condition
>   b90f7c9d2198: sched/pelt: Fix update of blocked PELT ordering
>   ff51ff84d82a: sched/core: Avoid spurious lock dependencies
> 
> and apply v2 of the uclamp_id type fix to sched/core. This would reduce 
> the risks of a Sunday pull request ...
> 

This sounds good to me. Sorry for the hassle.

> Thanks,
> 
> 	Ingo
>
Ingo Molnar Nov. 17, 2019, 10:29 a.m. UTC | #6
* Valentin Schneider <valentin.schneider@arm.com> wrote:

> On 17/11/2019 09:45, Ingo Molnar wrote:
> > I've picked v2 up instead. I suspect it's not really consequential as 
> > enums don't really get truncated by compilers, right? Is there any other 
> > negative runtime side effect possible from the imprecise enum/uint 
> > typing?
> > 
> 
> AFAIUI the requirement for the enum type is that it has to be an int type that
> covers all its values, so I could see some funky optimization (e.g. check the
> returned value is < 512 but it's assumed the type for the enum is 8 bits so
> this becomes always true). Then again we don't have any explicit check on
> those returned values, plus they fit in 11 bits, so as you say it's
> mostly likely inconsequential (and I didn't see any compile diff).

Yeah, so unless there's evidence of there being a nonzero chance of this 
being misbuilt I'd gravitate towards doing this via via sched/core, 
especially so late in the cycle.

> My "worry" wasn't really about this patch, it was more about the 
> following one - it didn't like the idea of merging an unneeded patch 
> (with a Fixes: tag on top of it).

Yeah, agreed - should be fixed now.

> >>>       sched/topology, cpuset: Account for housekeeping CPUs to avoid empty cpumasks
> >>
> >> And this one is no longer needed, as Michal & I understood (IOW the fix in
> >> rc6 is sufficient), see:
> >>
> >>   c425c5cb-ba8a-e5f6-d91c-5479779cfb7a@arm.com
> > 
> > Ok.
> > 
> > I'm inclined to just reduce sched/urgent back to these three fixes:
> > 
> >   6e1ff0773f49: sched/uclamp: Fix incorrect condition
> >   b90f7c9d2198: sched/pelt: Fix update of blocked PELT ordering
> >   ff51ff84d82a: sched/core: Avoid spurious lock dependencies
> > 
> > and apply v2 of the uclamp_id type fix to sched/core. This would reduce 
> > the risks of a Sunday pull request ...
> > 
> 
> This sounds good to me. Sorry for the hassle.

No hassle at all - thanks for catching these!

Thanks,

	Ingo
Linus Torvalds Nov. 17, 2019, 4:29 p.m. UTC | #7
On Sun, Nov 17, 2019 at 2:20 AM Valentin Schneider
<valentin.schneider@arm.com> wrote:
>
> AFAIUI the requirement for the enum type is that it has to be an int type that
> covers all its values, so I could see some funky optimization (e.g. check the
> returned value is < 512 but it's assumed the type for the enum is 8 bits so
> this becomes always true). Then again we don't have any explicit check on
> those returned values, plus they fit in 11 bits, so as you say it's
> mostly likely inconsequential (and I didn't see any compile diff).

Gcc can - and does - narrow enums to smaller integer types with the
'-fshort-enums' flag.

However, in practice nobody uses that, and it can cause interop
problems. So I think for us, enums are always at least 'int' (they can
be bigger).

That said, mixing enums and values that are bigger than the enumerated
ones is just a bad idea

It will, for example, cause us to miss compiler warnings (eg switch
statements with an enum will warn if you don't handle all cases, but
the 'all cases' is based on the actual enum range, not on the
_possible_ invalid values).

                     Linus
Valentin Schneider Nov. 17, 2019, 8:43 p.m. UTC | #8
On 17/11/2019 16:29, Linus Torvalds wrote:
> Gcc can - and does - narrow enums to smaller integer types with the
> '-fshort-enums' flag.
> 
> However, in practice nobody uses that, and it can cause interop
> problems. So I think for us, enums are always at least 'int' (they can
> be bigger).
> 
> That said, mixing enums and values that are bigger than the enumerated
> ones is just a bad idea
> 
> It will, for example, cause us to miss compiler warnings (eg switch
> statements with an enum will warn if you don't handle all cases, but
> the 'all cases' is based on the actual enum range, not on the
> _possible_ invalid values).
> 

Oh, yet another gcc flag... 

Thanks for the detailed write-up.

>                      Linus
>
Ingo Molnar Nov. 18, 2019, 8:03 a.m. UTC | #9
* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sun, Nov 17, 2019 at 2:20 AM Valentin Schneider
> <valentin.schneider@arm.com> wrote:
> >
> > AFAIUI the requirement for the enum type is that it has to be an int 
> > type that covers all its values, so I could see some funky 
> > optimization (e.g. check the returned value is < 512 but it's assumed 
> > the type for the enum is 8 bits so this becomes always true). Then 
> > again we don't have any explicit check on those returned values, plus 
> > they fit in 11 bits, so as you say it's mostly likely inconsequential 
> > (and I didn't see any compile diff).
> 
> Gcc can - and does - narrow enums to smaller integer types with the 
> '-fshort-enums' flag.

Good point - but at least according to the GCC 9.2.1 documentation, 
-fshort-enums is a non-default code generation option:

   Options for Code Generation Conventions

       These machine-independent options control the interface 
       conventions used in code generation.

       Most of them have both positive and negative forms; the negative 
       form of -ffoo is -fno-foo.  In the table below, only one of the 
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       forms is listed---the one that is not the default.  You can figure 
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       out the other form by either removing no- or adding it.

   [...]

       -fshort-enums

           Allocate to an "enum" type only as many bytes as it needs for 
           the declared range of possible values.  Specifically, the 
           "enum" type is equivalent to the smallest integer type that 
           has enough room.

           Warning: the -fshort-enums switch causes GCC to generate code 
           that is not binary compatible with code generated without that 
           switch.  Use it to conform to a non-default application binary 
           interface.

Unless this option is used AFAIK GCC will treat enums as "int" if at 
least one enumeration constant is negative, it's "unsigned int" 
otherwise.

The only current use reference to the non-standard -fshort-enums option 
within the kernel source is the Hexagon arch, which (seemingly 
unnecessarily) disables the option:

  arch/hexagon/Makefile:KBUILD_CFLAGS += -fno-short-enums

That flag came with the original Hexagon commits, 8 years ago:

  e95bf452a9e22   (Richard Kuo    2011-10-31 18:55:58 -0500       10)# Do not use single-byte enums; these will overflow.
  e95bf452a9e22   (Richard Kuo    2011-10-31 18:55:58 -0500       11)KBUILD_CFLAGS += -fno-short-enums

Maybe they had a GCC build where it was on by default? Or GCC changed 
this option sometime in the past? Or it's simply an unnecessary but 
harmless code generation flag out of paranoia?

Out of curiosity I searched all the historic trees, none ever made use of 
the -f*short-enums option, so I don't think this is a GCC option we ever 
actively utilized or ran into.

> However, in practice nobody uses that, and it can cause interop 
> problems. So I think for us, enums are always at least 'int' (they can 
> be bigger).

Yeah, the GCC documentation specifically warns that it breaks the ABI: 
the size of structs using enums will generally change from 4 bytes to 1 
or 2 bytes, and function call signatures will change incompatibly as 
well.

BTW., -fshort-enum looks like a bad code generation option to me, on x86 
at least, because it will also use 16-bit width, which is generally a bad 
idea on x86. If it limited itself to u8 and 32-bit types it could even be 
useful.

Also, I wouldn't be surprised if the kernel ABI broke if we attempted to 
use -short-enum, I bet there's a lot of accidental reliance on 
enum=int/uint.

> That said, mixing enums and values that are bigger than the enumerated 
> ones is just a bad idea
> 
> It will, for example, cause us to miss compiler warnings (eg switch 
> statements with an enum will warn if you don't handle all cases, but 
> the 'all cases' is based on the actual enum range, not on the 
> _possible_ invalid values).

That's true. Will check whether we can do something about improving the 
affected uclamp data structures.

Thanks,

	Ingo

Patch
diff mbox series

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index c87ee6412b36..e4c10785dc7c 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -798,8 +798,14 @@  static int generate_sched_domains(cpumask_var_t **domains,
 		    cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
 			continue;
 
+		/*
+		 * Skip cpusets that would lead to an empty sched domain.
+		 * That could be because effective_cpus is empty, or because
+		 * it's only spanning CPUs outside the housekeeping mask.
+		 */
 		if (is_sched_load_balance(cp) &&
-		    !cpumask_empty(cp->effective_cpus))
+		    cpumask_intersects(cp->effective_cpus,
+				       housekeeping_cpumask(HK_FLAG_DOMAIN)))
 			csa[csn++] = cp;
 
 		/* skip @cp's subtree if not a partition root */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0f2eb3629070..a4f76d3f5011 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -853,7 +853,7 @@  static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
 }
 
 static inline
-enum uclamp_id uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
+unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
 				   unsigned int clamp_value)
 {
 	struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
@@ -918,7 +918,7 @@  uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
 	return uc_req;
 }
 
-enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
+unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
 {
 	struct uclamp_se uc_eff;
 
@@ -1065,7 +1065,7 @@  uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
 	 * affecting a valid clamp bucket, the next time it's enqueued,
 	 * it will already see the updated clamp bucket value.
 	 */
-	if (!p->uclamp[clamp_id].active) {
+	if (p->uclamp[clamp_id].active) {
 		uclamp_rq_dec_id(rq, p, clamp_id);
 		uclamp_rq_inc_id(rq, p, clamp_id);
 	}
@@ -6019,10 +6019,11 @@  void init_idle(struct task_struct *idle, int cpu)
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
 
+	__sched_fork(0, idle);
+
 	raw_spin_lock_irqsave(&idle->pi_lock, flags);
 	raw_spin_lock(&rq->lock);
 
-	__sched_fork(0, idle);
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 	idle->flags |= PF_IDLE;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22a2fed29054..69a81a5709ff 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7547,6 +7547,19 @@  static void update_blocked_averages(int cpu)
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
 
+	/*
+	 * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
+	 * that RT, DL and IRQ signals have been updated before updating CFS.
+	 */
+	curr_class = rq->curr->sched_class;
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
+	update_irq_load_avg(rq, 0);
+
+	/* Don't need periodic decay once load/util_avg are null */
+	if (others_have_blocked(rq))
+		done = false;
+
 	/*
 	 * Iterates the task_group tree in a bottom up fashion, see
 	 * list_add_leaf_cfs_rq() for details.
@@ -7574,14 +7587,6 @@  static void update_blocked_averages(int cpu)
 			done = false;
 	}
 
-	curr_class = rq->curr->sched_class;
-	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
-	update_irq_load_avg(rq, 0);
-	/* Don't need periodic decay once load/util_avg are null */
-	if (others_have_blocked(rq))
-		done = false;
-
 	update_blocked_load_status(rq, !done);
 	rq_unlock_irqrestore(rq, &rf);
 }
@@ -7642,12 +7647,18 @@  static inline void update_blocked_averages(int cpu)
 
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
-	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
 
+	/*
+	 * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
+	 * that RT, DL and IRQ signals have been updated before updating CFS.
+	 */
 	curr_class = rq->curr->sched_class;
 	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
 	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
 	update_irq_load_avg(rq, 0);
+
+	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
+
 	update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
 	rq_unlock_irqrestore(rq, &rf);
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8870c5bd7df..49ed949f850c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2309,7 +2309,7 @@  static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef CONFIG_UCLAMP_TASK
-enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
 
 static __always_inline
 unsigned int uclamp_util_with(struct rq *rq, unsigned int util,