linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
@ 2022-11-10 19:57 Sam Wu
  2022-11-15 22:35 ` Saravana Kannan
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Sam Wu @ 2022-11-10 19:57 UTC (permalink / raw)
  To: Rafael J. Wysocki, Viresh Kumar, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Lukasz Luba
  Cc: Sam Wu, Saravana Kannan, Isaac J . Manjarres, kernel-team,
	Rafael J. Wysocki, linux-pm, linux-kernel

This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.

On a Pixel 6 device, it is observed that this commit increases
latency by approximately 50ms, or 20%, in migrating a task
that requires full CPU utilization from a LITTLE CPU to Fmax
on a big CPU. Reverting this change restores the latency back
to its original baseline value.

Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy")
Cc: Lukasz Luba <lukasz.luba@arm.com>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Isaac J. Manjarres <isaacmanjarres@google.com>
Signed-off-by: Sam Wu <wusamuel@google.com>
---
 kernel/sched/cpufreq_schedutil.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 9161d1136d01..1207c78f85c1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -25,9 +25,6 @@ struct sugov_policy {
 	unsigned int		next_freq;
 	unsigned int		cached_raw_freq;
 
-	/* max CPU capacity, which is equal for all CPUs in freq. domain */
-	unsigned long		max;
-
 	/* The next fields are only needed if fast switch cannot be used: */
 	struct			irq_work irq_work;
 	struct			kthread_work work;
@@ -51,6 +48,7 @@ struct sugov_cpu {
 
 	unsigned long		util;
 	unsigned long		bw_dl;
+	unsigned long		max;
 
 	/* The field below is for single-CPU policies only: */
 #ifdef CONFIG_NO_HZ_COMMON
@@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
 {
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
 
+	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
 	sg_cpu->bw_dl = cpu_bw_dl(rq);
 	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
 					  FREQUENCY_UTIL, NULL);
@@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
  */
 static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
 {
-	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	unsigned long boost;
 
 	/* No boost currently required */
@@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
 	 * sg_cpu->util is already in capacity scale; convert iowait_boost
 	 * into the same scale so we can compare.
 	 */
-	boost = sg_cpu->iowait_boost * sg_policy->max;
-	boost >>= SCHED_CAPACITY_SHIFT;
+	boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
 	boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
 	if (sg_cpu->util < boost)
 		sg_cpu->util = boost;
@@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
 	if (!sugov_update_single_common(sg_cpu, time, flags))
 		return;
 
-	next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max);
+	next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
 	/*
 	 * Do not reduce the frequency if the CPU has not been idle
 	 * recently, as the reduction is likely to be premature then.
@@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 				     unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
-	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	unsigned long prev_util = sg_cpu->util;
 
 	/*
@@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 		sg_cpu->util = prev_util;
 
 	cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
-				   map_util_perf(sg_cpu->util),
-				   sg_policy->max);
+				   map_util_perf(sg_cpu->util), sg_cpu->max);
 
 	sg_cpu->sg_policy->last_freq_update_time = time;
 }
@@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	unsigned long util = 0;
+	unsigned long util = 0, max = 1;
 	unsigned int j;
 
 	for_each_cpu(j, policy->cpus) {
 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
+		unsigned long j_util, j_max;
 
 		sugov_get_util(j_sg_cpu);
 		sugov_iowait_apply(j_sg_cpu, time);
+		j_util = j_sg_cpu->util;
+		j_max = j_sg_cpu->max;
 
-		util = max(j_sg_cpu->util, util);
+		if (j_util * max > j_max * util) {
+			util = j_util;
+			max = j_max;
+		}
 	}
 
-	return get_next_freq(sg_policy, util, sg_policy->max);
+	return get_next_freq(sg_policy, util, max);
 }
 
 static void
@@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 	void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
-	unsigned int cpu = cpumask_first(policy->cpus);
+	unsigned int cpu;
 
 	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
 	sg_policy->last_freq_update_time	= 0;
@@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy)
 	sg_policy->work_in_progress		= false;
 	sg_policy->limits_changed		= false;
 	sg_policy->cached_raw_freq		= 0;
-	sg_policy->max				= arch_scale_cpu_capacity(cpu);
 
 	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
 
-- 
2.38.1.431.g37b22c650d-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-10 19:57 [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Sam Wu
@ 2022-11-15 22:35 ` Saravana Kannan
  2022-11-16 11:43   ` Lukasz Luba
  2022-11-16 11:35 ` Lukasz Luba
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Saravana Kannan @ 2022-11-15 22:35 UTC (permalink / raw)
  To: Sam Wu
  Cc: Rafael J. Wysocki, Viresh Kumar, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Lukasz Luba, Isaac J . Manjarres,
	kernel-team, Rafael J. Wysocki, linux-pm, linux-kernel

On Thu, Nov 10, 2022 at 11:57 AM Sam Wu <wusamuel@google.com> wrote:
>
> This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.
>
> On a Pixel 6 device, it is observed that this commit increases
> latency by approximately 50ms, or 20%, in migrating a task
> that requires full CPU utilization from a LITTLE CPU to Fmax
> on a big CPU. Reverting this change restores the latency back
> to its original baseline value.
>
> Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy")
> Cc: Lukasz Luba <lukasz.luba@arm.com>
> Cc: Saravana Kannan <saravanak@google.com>
> Cc: Isaac J. Manjarres <isaacmanjarres@google.com>
> Signed-off-by: Sam Wu <wusamuel@google.com>

Rafael, can we pick this up please?

Lukasz, no objections to the idea itself, but it's causing regression
and we'd like to revert and then fix it.

-Saravana

> ---
>  kernel/sched/cpufreq_schedutil.c | 30 +++++++++++++++---------------
>  1 file changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 9161d1136d01..1207c78f85c1 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -25,9 +25,6 @@ struct sugov_policy {
>         unsigned int            next_freq;
>         unsigned int            cached_raw_freq;
>
> -       /* max CPU capacity, which is equal for all CPUs in freq. domain */
> -       unsigned long           max;
> -
>         /* The next fields are only needed if fast switch cannot be used: */
>         struct                  irq_work irq_work;
>         struct                  kthread_work work;
> @@ -51,6 +48,7 @@ struct sugov_cpu {
>
>         unsigned long           util;
>         unsigned long           bw_dl;
> +       unsigned long           max;
>
>         /* The field below is for single-CPU policies only: */
>  #ifdef CONFIG_NO_HZ_COMMON
> @@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
>  {
>         struct rq *rq = cpu_rq(sg_cpu->cpu);
>
> +       sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
>         sg_cpu->bw_dl = cpu_bw_dl(rq);
>         sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
>                                           FREQUENCY_UTIL, NULL);
> @@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
>   */
>  static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
>  {
> -       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>         unsigned long boost;
>
>         /* No boost currently required */
> @@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
>          * sg_cpu->util is already in capacity scale; convert iowait_boost
>          * into the same scale so we can compare.
>          */
> -       boost = sg_cpu->iowait_boost * sg_policy->max;
> -       boost >>= SCHED_CAPACITY_SHIFT;
> +       boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
>         boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
>         if (sg_cpu->util < boost)
>                 sg_cpu->util = boost;
> @@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
>         if (!sugov_update_single_common(sg_cpu, time, flags))
>                 return;
>
> -       next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max);
> +       next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
>         /*
>          * Do not reduce the frequency if the CPU has not been idle
>          * recently, as the reduction is likely to be premature then.
> @@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
>                                      unsigned int flags)
>  {
>         struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
> -       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>         unsigned long prev_util = sg_cpu->util;
>
>         /*
> @@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
>                 sg_cpu->util = prev_util;
>
>         cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
> -                                  map_util_perf(sg_cpu->util),
> -                                  sg_policy->max);
> +                                  map_util_perf(sg_cpu->util), sg_cpu->max);
>
>         sg_cpu->sg_policy->last_freq_update_time = time;
>  }
> @@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
>  {
>         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>         struct cpufreq_policy *policy = sg_policy->policy;
> -       unsigned long util = 0;
> +       unsigned long util = 0, max = 1;
>         unsigned int j;
>
>         for_each_cpu(j, policy->cpus) {
>                 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
> +               unsigned long j_util, j_max;
>
>                 sugov_get_util(j_sg_cpu);
>                 sugov_iowait_apply(j_sg_cpu, time);
> +               j_util = j_sg_cpu->util;
> +               j_max = j_sg_cpu->max;
>
> -               util = max(j_sg_cpu->util, util);
> +               if (j_util * max > j_max * util) {
> +                       util = j_util;
> +                       max = j_max;
> +               }
>         }
>
> -       return get_next_freq(sg_policy, util, sg_policy->max);
> +       return get_next_freq(sg_policy, util, max);
>  }
>
>  static void
> @@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy)
>  {
>         struct sugov_policy *sg_policy = policy->governor_data;
>         void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
> -       unsigned int cpu = cpumask_first(policy->cpus);
> +       unsigned int cpu;
>
>         sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
>         sg_policy->last_freq_update_time        = 0;
> @@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy)
>         sg_policy->work_in_progress             = false;
>         sg_policy->limits_changed               = false;
>         sg_policy->cached_raw_freq              = 0;
> -       sg_policy->max                          = arch_scale_cpu_capacity(cpu);
>
>         sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
>
> --
> 2.38.1.431.g37b22c650d-goog
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-10 19:57 [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Sam Wu
  2022-11-15 22:35 ` Saravana Kannan
@ 2022-11-16 11:35 ` Lukasz Luba
  2022-11-20 18:07 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" #forregzbot Thorsten Leemhuis
  2022-12-05  9:18 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Viresh Kumar
  3 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2022-11-16 11:35 UTC (permalink / raw)
  To: Sam Wu
  Cc: Saravana Kannan, Ingo Molnar, Dietmar Eggemann,
	Isaac J . Manjarres, kernel-team, Mel Gorman, Juri Lelli,
	Ben Segall, Steven Rostedt, Vincent Guittot, Valentin Schneider,
	Peter Zijlstra, Rafael J. Wysocki, linux-pm, linux-kernel,
	Rafael J. Wysocki, Daniel Bristot de Oliveira, Viresh Kumar

Hi Sam,

On 11/10/22 19:57, Sam Wu wrote:
> This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.
> 
> On a Pixel 6 device, it is observed that this commit increases
> latency by approximately 50ms, or 20%, in migrating a task
> that requires full CPU utilization from a LITTLE CPU to Fmax
> on a big CPU. Reverting this change restores the latency back
> to its original baseline value.

Which mainline kernel version you use in pixel6?

Could you elaborate a bit how is it possible?
Do you have the sg_policy setup properly (and at right time)?
Do you have the cpu capacity from arch_scale_cpu_capacity()
set correctly and at the right time during this cpufreq
governor setup?

IIRC in Android there is a different code for setting up the
cpufreq sched governor clones. In mainline we don't have to do
those tricks, so this might be the main difference.

Could you trace the value that is read from
arch_scale_cpu_capacity() and share it with us?
I suspect this value changes in time in your kernel.

Regards,
Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-15 22:35 ` Saravana Kannan
@ 2022-11-16 11:43   ` Lukasz Luba
  2022-11-16 12:17     ` Rafael J. Wysocki
  2022-11-18  1:00     ` Sam Wu
  0 siblings, 2 replies; 16+ messages in thread
From: Lukasz Luba @ 2022-11-16 11:43 UTC (permalink / raw)
  To: Saravana Kannan, Sam Wu
  Cc: Rafael J. Wysocki, Viresh Kumar, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Isaac J . Manjarres, kernel-team,
	Rafael J. Wysocki, linux-pm, linux-kernel



On 11/15/22 22:35, Saravana Kannan wrote:
> On Thu, Nov 10, 2022 at 11:57 AM Sam Wu <wusamuel@google.com> wrote:
>>
>> This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.
>>
>> On a Pixel 6 device, it is observed that this commit increases
>> latency by approximately 50ms, or 20%, in migrating a task
>> that requires full CPU utilization from a LITTLE CPU to Fmax
>> on a big CPU. Reverting this change restores the latency back
>> to its original baseline value.
>>
>> Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy")
>> Cc: Lukasz Luba <lukasz.luba@arm.com>
>> Cc: Saravana Kannan <saravanak@google.com>
>> Cc: Isaac J. Manjarres <isaacmanjarres@google.com>
>> Signed-off-by: Sam Wu <wusamuel@google.com>
> 
> Rafael, can we pick this up please?
> 
> Lukasz, no objections to the idea itself, but it's causing regression
> and we'd like to revert and then fix it.

If you see this in mainline kernel, then I'm fine with reverting it.
Then I will have to trace why this CPU capacity value can change over
time in mainline kernel (while it shouldn't, because we register the
cpufreq policy and the governor later, after we calculate the capacity
in arch_topology.c). Maybe something has changed in mainline in the
meantime in this CPU capacity setup code, which caused this side effect.

I know that android-mainline has some different setup code for those
custom vendor governors. I just want to eliminate this bit and be on the
same page.

Regards,
Lukasz

> 
> -Saravana
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-16 11:43   ` Lukasz Luba
@ 2022-11-16 12:17     ` Rafael J. Wysocki
  2022-11-18  1:00     ` Sam Wu
  1 sibling, 0 replies; 16+ messages in thread
From: Rafael J. Wysocki @ 2022-11-16 12:17 UTC (permalink / raw)
  To: Lukasz Luba
  Cc: Saravana Kannan, Sam Wu, Rafael J. Wysocki, Viresh Kumar,
	Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel

On Wed, Nov 16, 2022 at 12:43 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 11/15/22 22:35, Saravana Kannan wrote:
> > On Thu, Nov 10, 2022 at 11:57 AM Sam Wu <wusamuel@google.com> wrote:
> >>
> >> This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.
> >>
> >> On a Pixel 6 device, it is observed that this commit increases
> >> latency by approximately 50ms, or 20%, in migrating a task
> >> that requires full CPU utilization from a LITTLE CPU to Fmax
> >> on a big CPU. Reverting this change restores the latency back
> >> to its original baseline value.
> >>
> >> Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy")
> >> Cc: Lukasz Luba <lukasz.luba@arm.com>
> >> Cc: Saravana Kannan <saravanak@google.com>
> >> Cc: Isaac J. Manjarres <isaacmanjarres@google.com>
> >> Signed-off-by: Sam Wu <wusamuel@google.com>
> >
> > Rafael, can we pick this up please?
> >
> > Lukasz, no objections to the idea itself, but it's causing regression
> > and we'd like to revert and then fix it.
>
> If you see this in mainline kernel, then I'm fine with reverting it.

OK, I'll wait for the confirmation of this.

> Then I will have to trace why this CPU capacity value can change over
> time in mainline kernel (while it shouldn't, because we register the
> cpufreq policy and the governor later, after we calculate the capacity
> in arch_topology.c). Maybe something has changed in mainline in the
> meantime in this CPU capacity setup code, which caused this side effect.
>
> I know that android-mainline has some different setup code for those
> custom vendor governors. I just want to eliminate this bit and be on the
> same page.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-16 11:43   ` Lukasz Luba
  2022-11-16 12:17     ` Rafael J. Wysocki
@ 2022-11-18  1:00     ` Sam Wu
  2022-11-21 19:18       ` Rafael J. Wysocki
  1 sibling, 1 reply; 16+ messages in thread
From: Sam Wu @ 2022-11-18  1:00 UTC (permalink / raw)
  To: Lukasz Luba
  Cc: Saravana Kannan, Rafael J. Wysocki, Viresh Kumar, Ingo Molnar,
	Peter Zijlstra, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel

On Wed, Nov 16, 2022 at 3:35 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> Which mainline kernel version you use in pixel6?
I am using kernel version 6.1-rc5.
>
> Could you elaborate a bit how is it possible?
> Do you have the sg_policy setup properly (and at right time)?
> Do you have the cpu capacity from arch_scale_cpu_capacity()
> set correctly and at the right time during this cpufreq
> governor setup?
>
> IIRC in Android there is a different code for setting up the
> cpufreq sched governor clones. In mainline we don't have to do
> those tricks, so this might be the main difference.
This behavior is seen on the mainline kernel. There isn't any vendor code
modifying the behavior, and the schedutil governor is being used.
>
> Could you trace the value that is read from
> arch_scale_cpu_capacity() and share it with us?
> I suspect this value changes in time in your kernel.
There's an additional CPU capacity normalization step during
init_cpu_capacity_callback() that does not happen until all the CPUs come
online. However, the sugov_start() function can be called for a subset of
CPUs before all the CPUs are brought up and before the normalization of
the CPU capacity values, so there could be a stale value stored
in sugov_policy.max field.

Best,
Sam

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" #forregzbot
  2022-11-10 19:57 [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Sam Wu
  2022-11-15 22:35 ` Saravana Kannan
  2022-11-16 11:35 ` Lukasz Luba
@ 2022-11-20 18:07 ` Thorsten Leemhuis
  2022-11-27 12:06   ` Thorsten Leemhuis
  2022-12-05  9:18 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Viresh Kumar
  3 siblings, 1 reply; 16+ messages in thread
From: Thorsten Leemhuis @ 2022-11-20 18:07 UTC (permalink / raw)
  To: regressions; +Cc: linux-pm, linux-kernel

[Note: this mail is primarily send for documentation purposes and/or for
regzbot, my Linux kernel regression tracking bot. That's why I removed
most or all folks from the list of recipients, but left any that looked
like a mailing lists. These mails usually contain '#forregzbot' in the
subject, to make them easy to spot and filter out.]

[TLDR: I'm adding this regression report to the list of tracked
regressions; all text from me you find below is based on a few templates
paragraphs you might have encountered already already in similar form.]

Hi, this is your Linux kernel regression tracker.

On 10.11.22 20:57, Sam Wu wrote:
> This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.
> 
> On a Pixel 6 device, it is observed that this commit increases
> latency by approximately 50ms, or 20%, in migrating a task
> that requires full CPU utilization from a LITTLE CPU to Fmax
> on a big CPU. Reverting this change restores the latency back
> to its original baseline value.
> 
> Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy")
> Cc: Lukasz Luba <lukasz.luba@arm.com>
> Cc: Saravana Kannan <saravanak@google.com>
> Cc: Isaac J. Manjarres <isaacmanjarres@google.com>
> Signed-off-by: Sam Wu <wusamuel@google.com>

Thanks for the report. To be sure below issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, my Linux kernel regression
tracking bot:

#regzbot ^introduced 6d5afdc97ea7
#regzbot title cpufreq: schedutil: improved latency on Pixel 6
#regzbot ignore-activity

This isn't a regression? This issue or a fix for it are already
discussed somewhere else? It was fixed already? You want to clarify when
the regression started to happen? Or point out I got the title or
something else totally wrong? Then just reply -- ideally with also
telling regzbot about it, as explained here:
https://linux-regtracking.leemhuis.info/tracked-regression/

Reminder for developers: When fixing the issue, add 'Link:' tags
pointing to the report (the mail this one replies to), as explained for
in the Linux kernel's documentation; above webpage explains why this is
important for tracked regressions.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)

P.S.: As the Linux kernel's regression tracker I deal with a lot of
reports and sometimes miss something important when writing mails like
this. If that's the case here, don't hesitate to tell me in a public
reply, it's in everyone's interest to set the public record straight.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-18  1:00     ` Sam Wu
@ 2022-11-21 19:18       ` Rafael J. Wysocki
  2022-11-22  8:58         ` Lukasz Luba
  0 siblings, 1 reply; 16+ messages in thread
From: Rafael J. Wysocki @ 2022-11-21 19:18 UTC (permalink / raw)
  To: Sam Wu
  Cc: Lukasz Luba, Saravana Kannan, Rafael J. Wysocki, Viresh Kumar,
	Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel

On Fri, Nov 18, 2022 at 2:00 AM Sam Wu <wusamuel@google.com> wrote:
>
> On Wed, Nov 16, 2022 at 3:35 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> > Which mainline kernel version you use in pixel6?
> I am using kernel version 6.1-rc5.
> >
> > Could you elaborate a bit how is it possible?
> > Do you have the sg_policy setup properly (and at right time)?
> > Do you have the cpu capacity from arch_scale_cpu_capacity()
> > set correctly and at the right time during this cpufreq
> > governor setup?
> >
> > IIRC in Android there is a different code for setting up the
> > cpufreq sched governor clones. In mainline we don't have to do
> > those tricks, so this might be the main difference.
> This behavior is seen on the mainline kernel. There isn't any vendor code
> modifying the behavior, and the schedutil governor is being used.
> >
> > Could you trace the value that is read from
> > arch_scale_cpu_capacity() and share it with us?
> > I suspect this value changes in time in your kernel.
> There's an additional CPU capacity normalization step during
> init_cpu_capacity_callback() that does not happen until all the CPUs come
> online. However, the sugov_start() function can be called for a subset of
> CPUs before all the CPUs are brought up and before the normalization of
> the CPU capacity values, so there could be a stale value stored
> in sugov_policy.max field.

OK, the revert has been applied as 6.1-rc material, thanks!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-21 19:18       ` Rafael J. Wysocki
@ 2022-11-22  8:58         ` Lukasz Luba
  2022-11-30 10:42           ` Vincent Guittot
  0 siblings, 1 reply; 16+ messages in thread
From: Lukasz Luba @ 2022-11-22  8:58 UTC (permalink / raw)
  To: Rafael J. Wysocki, Sam Wu
  Cc: Saravana Kannan, Viresh Kumar, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Isaac J . Manjarres, kernel-team,
	Rafael J. Wysocki, linux-pm, linux-kernel

Hi Rafael and Sam

On 11/21/22 19:18, Rafael J. Wysocki wrote:
> On Fri, Nov 18, 2022 at 2:00 AM Sam Wu <wusamuel@google.com> wrote:
>>
>> On Wed, Nov 16, 2022 at 3:35 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>> Which mainline kernel version you use in pixel6?
>> I am using kernel version 6.1-rc5.
>>>
>>> Could you elaborate a bit how is it possible?
>>> Do you have the sg_policy setup properly (and at right time)?
>>> Do you have the cpu capacity from arch_scale_cpu_capacity()
>>> set correctly and at the right time during this cpufreq
>>> governor setup?
>>>
>>> IIRC in Android there is a different code for setting up the
>>> cpufreq sched governor clones. In mainline we don't have to do
>>> those tricks, so this might be the main difference.
>> This behavior is seen on the mainline kernel. There isn't any vendor code
>> modifying the behavior, and the schedutil governor is being used.
>>>
>>> Could you trace the value that is read from
>>> arch_scale_cpu_capacity() and share it with us?
>>> I suspect this value changes in time in your kernel.
>> There's an additional CPU capacity normalization step during
>> init_cpu_capacity_callback() that does not happen until all the CPUs come
>> online. However, the sugov_start() function can be called for a subset of
>> CPUs before all the CPUs are brought up and before the normalization of
>> the CPU capacity values, so there could be a stale value stored
>> in sugov_policy.max field.
> 
> OK, the revert has been applied as 6.1-rc material, thanks!

I was on a business trip last week so couldn't check this.
Now I'm back and I've checked the booting sequence.
Yes, there is some race condition and the mechanism
using blocking_notifier_call_chain() in the cpufreq_online()
doesn't help while we are registering that schedutil
new policy.

I will have to go through those mechanisms and check them.
I agree, for now the best option is to revert the patch.

My apologies for introducing this issues.
Thanks Sam for capturing it.

Regards,
Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" #forregzbot
  2022-11-20 18:07 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" #forregzbot Thorsten Leemhuis
@ 2022-11-27 12:06   ` Thorsten Leemhuis
  0 siblings, 0 replies; 16+ messages in thread
From: Thorsten Leemhuis @ 2022-11-27 12:06 UTC (permalink / raw)
  To: regressions; +Cc: linux-pm, linux-kernel

On 20.11.22 19:07, Thorsten Leemhuis wrote:
> On 10.11.22 20:57, Sam Wu wrote:
>> This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11.
>>
>> On a Pixel 6 device, it is observed that this commit increases
>> latency by approximately 50ms, or 20%, in migrating a task
>> that requires full CPU utilization from a LITTLE CPU to Fmax
>> on a big CPU. Reverting this change restores the latency back
>> to its original baseline value.
>>
>> Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy")
>> Cc: Lukasz Luba <lukasz.luba@arm.com>
>> Cc: Saravana Kannan <saravanak@google.com>
>> Cc: Isaac J. Manjarres <isaacmanjarres@google.com>
>> Signed-off-by: Sam Wu <wusamuel@google.com>
> 
> Thanks for the report. To be sure below issue doesn't fall through the
> cracks unnoticed, I'm adding it to regzbot, my Linux kernel regression
> tracking bot:
> 
> #regzbot ^introduced 6d5afdc97ea7
> #regzbot title cpufreq: schedutil: improved latency on Pixel 6
> #regzbot ignore-activity

#regzbot fixed-by: cdcc5ef26b3

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-22  8:58         ` Lukasz Luba
@ 2022-11-30 10:42           ` Vincent Guittot
  2022-11-30 14:04             ` Lukasz Luba
  0 siblings, 1 reply; 16+ messages in thread
From: Vincent Guittot @ 2022-11-30 10:42 UTC (permalink / raw)
  To: Lukasz Luba
  Cc: Rafael J. Wysocki, Sam Wu, Saravana Kannan, Viresh Kumar,
	Ingo Molnar, Peter Zijlstra, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel

Hi All

Just for the log and because it took me a while to figure out the root
cause of the problem: This patch also creates a regression for
snapdragon845 based systems and probably on any QC chipsets that use a
LUT to update the OPP table at boot. The behavior is the same as
described by Sam with a staled value in sugov_policy.max field.

Regards,
Vincent

On Tue, 22 Nov 2022 at 09:58, Lukasz Luba <lukasz.luba@arm.com> wrote:
>
> Hi Rafael and Sam
>
> On 11/21/22 19:18, Rafael J. Wysocki wrote:
> > On Fri, Nov 18, 2022 at 2:00 AM Sam Wu <wusamuel@google.com> wrote:
> >>
> >> On Wed, Nov 16, 2022 at 3:35 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>> Which mainline kernel version you use in pixel6?
> >> I am using kernel version 6.1-rc5.
> >>>
> >>> Could you elaborate a bit how is it possible?
> >>> Do you have the sg_policy setup properly (and at right time)?
> >>> Do you have the cpu capacity from arch_scale_cpu_capacity()
> >>> set correctly and at the right time during this cpufreq
> >>> governor setup?
> >>>
> >>> IIRC in Android there is a different code for setting up the
> >>> cpufreq sched governor clones. In mainline we don't have to do
> >>> those tricks, so this might be the main difference.
> >> This behavior is seen on the mainline kernel. There isn't any vendor code
> >> modifying the behavior, and the schedutil governor is being used.
> >>>
> >>> Could you trace the value that is read from
> >>> arch_scale_cpu_capacity() and share it with us?
> >>> I suspect this value changes in time in your kernel.
> >> There's an additional CPU capacity normalization step during
> >> init_cpu_capacity_callback() that does not happen until all the CPUs come
> >> online. However, the sugov_start() function can be called for a subset of
> >> CPUs before all the CPUs are brought up and before the normalization of
> >> the CPU capacity values, so there could be a stale value stored
> >> in sugov_policy.max field.
> >
> > OK, the revert has been applied as 6.1-rc material, thanks!
>
> I was on a business trip last week so couldn't check this.
> Now I'm back and I've checked the booting sequence.
> Yes, there is some race condition and the mechanism
> using blocking_notifier_call_chain() in the cpufreq_online()
> doesn't help while we are registering that schedutil
> new policy.
>
> I will have to go through those mechanisms and check them.
> I agree, for now the best option is to revert the patch.
>
> My apologies for introducing this issues.
> Thanks Sam for capturing it.
>
> Regards,
> Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-30 10:42           ` Vincent Guittot
@ 2022-11-30 14:04             ` Lukasz Luba
  2022-11-30 14:29               ` Vincent Guittot
  0 siblings, 1 reply; 16+ messages in thread
From: Lukasz Luba @ 2022-11-30 14:04 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Rafael J. Wysocki, Sam Wu, Saravana Kannan, Viresh Kumar,
	Ingo Molnar, Peter Zijlstra, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel

Hi Vincent,

On 11/30/22 10:42, Vincent Guittot wrote:
> Hi All
> 
> Just for the log and because it took me a while to figure out the root
> cause of the problem: This patch also creates a regression for
> snapdragon845 based systems and probably on any QC chipsets that use a
> LUT to update the OPP table at boot. The behavior is the same as
> described by Sam with a staled value in sugov_policy.max field.

Thanks for sharing this info and apologies that you spent cycles
on it.

I have checked that whole setup code (capacity + cpufreq policy and
governor). It looks like to have a proper capacity of CPUs, we need
to wait till the last policy is created. It's due to the arch_topology.c
mechanism which is only triggered after all CPUs' got the policy.
Unfortunately, this leads to a chicken & egg situation for this
schedutil setup of max capacity.

I have experimented with this code, which triggers an update in
the schedutil, when all CPUs got the policy and sugov gov
(with trace_printk() to mach the output below)

-------------------------8<-----------------------------------------
diff --git a/kernel/sched/cpufreq_schedutil.c 
b/kernel/sched/cpufreq_schedutil.c
index 9161d1136d01..f1913a857218 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -59,6 +59,7 @@ struct sugov_cpu {
  };

  static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
+static cpumask_var_t cpus_to_visit;

  /************************ Governor internals ***********************/

@@ -783,6 +784,22 @@ static int sugov_start(struct cpufreq_policy *policy)

                 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 
uu);
         }
+
+       cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
+
+       if (cpumask_empty(cpus_to_visit)) {
+               trace_printk("schedutil the visit cpu mask is empty now\n");
+               for_each_possible_cpu(cpu) {
+                       struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+                       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+
+                       sg_policy->max = arch_scale_cpu_capacity(cpu);
+
+                       trace_printk("SCHEDUTIL: NEW  CPU%u 
cpu_capacity=%lu\n",
+                               cpu, sg_policy->max);
+               }
+       }
+
         return 0;
  }

@@ -800,6 +817,8 @@ static void sugov_stop(struct cpufreq_policy *policy)
                 irq_work_sync(&sg_policy->irq_work);
                 kthread_cancel_work_sync(&sg_policy->work);
         }
+
+       cpumask_or(cpus_to_visit, cpus_to_visit, policy->related_cpus);
  }

  static void sugov_limits(struct cpufreq_policy *policy)
@@ -829,6 +848,11 @@ struct cpufreq_governor schedutil_gov = {
  #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
  struct cpufreq_governor *cpufreq_default_governor(void)
  {
+       if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
+               return NULL;
+
+       cpumask_copy(cpus_to_visit, cpu_possible_mask);
+
         return &schedutil_gov;
  }
  #endif

---------------------------------->8---------------------------------


That simple approach fixes the issue. I have also tested it with
governor change a few times and setting back the schedutil.

-------------------------------------------
    kworker/u12:1-48      [004] .....     2.208847: sugov_start: 
schedutil the visit cpu mask is empty now
    kworker/u12:1-48      [004] .....     2.208854: sugov_start: 
SCHEDUTIL: NEW  CPU0 cpu_capacity=381
    kworker/u12:1-48      [004] .....     2.208857: sugov_start: 
SCHEDUTIL: NEW  CPU1 cpu_capacity=381
    kworker/u12:1-48      [004] .....     2.208860: sugov_start: 
SCHEDUTIL: NEW  CPU2 cpu_capacity=381
    kworker/u12:1-48      [004] .....     2.208862: sugov_start: 
SCHEDUTIL: NEW  CPU3 cpu_capacity=381
    kworker/u12:1-48      [004] .....     2.208864: sugov_start: 
SCHEDUTIL: NEW  CPU4 cpu_capacity=1024
    kworker/u12:1-48      [004] .....     2.208866: sugov_start: 
SCHEDUTIL: NEW  CPU5 cpu_capacity=1024
             bash-615     [005] .....    35.317113: sugov_start: 
schedutil the visit cpu mask is empty now
             bash-615     [005] .....    35.317120: sugov_start: 
SCHEDUTIL: NEW  CPU0 cpu_capacity=381
             bash-615     [005] .....    35.317123: sugov_start: 
SCHEDUTIL: NEW  CPU1 cpu_capacity=381
             bash-615     [005] .....    35.317125: sugov_start: 
SCHEDUTIL: NEW  CPU2 cpu_capacity=381
             bash-615     [005] .....    35.317127: sugov_start: 
SCHEDUTIL: NEW  CPU3 cpu_capacity=381
             bash-615     [005] .....    35.317129: sugov_start: 
SCHEDUTIL: NEW  CPU4 cpu_capacity=1024
             bash-615     [005] .....    35.317131: sugov_start: 
SCHEDUTIL: NEW  CPU5 cpu_capacity=1024
             bash-623     [003] .....    57.633328: sugov_start: 
schedutil the visit cpu mask is empty now
             bash-623     [003] .....    57.633336: sugov_start: 
SCHEDUTIL: NEW  CPU0 cpu_capacity=381
             bash-623     [003] .....    57.633339: sugov_start: 
SCHEDUTIL: NEW  CPU1 cpu_capacity=381
             bash-623     [003] .....    57.633340: sugov_start: 
SCHEDUTIL: NEW  CPU2 cpu_capacity=381
             bash-623     [003] .....    57.633342: sugov_start: 
SCHEDUTIL: NEW  CPU3 cpu_capacity=381
             bash-623     [003] .....    57.633343: sugov_start: 
SCHEDUTIL: NEW  CPU4 cpu_capacity=1024
             bash-623     [003] .....    57.633344: sugov_start: 
SCHEDUTIL: NEW  CPU5 cpu_capacity=1024
----------------------------------------------------

It should work.

Regards,
Lukasz

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-30 14:04             ` Lukasz Luba
@ 2022-11-30 14:29               ` Vincent Guittot
  2022-11-30 15:00                 ` Lukasz Luba
  0 siblings, 1 reply; 16+ messages in thread
From: Vincent Guittot @ 2022-11-30 14:29 UTC (permalink / raw)
  To: Lukasz Luba
  Cc: Rafael J. Wysocki, Sam Wu, Saravana Kannan, Viresh Kumar,
	Ingo Molnar, Peter Zijlstra, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel

On Wed, 30 Nov 2022 at 15:04, Lukasz Luba <lukasz.luba@arm.com> wrote:
>
> Hi Vincent,
>
> On 11/30/22 10:42, Vincent Guittot wrote:
> > Hi All
> >
> > Just for the log and because it took me a while to figure out the root
> > cause of the problem: This patch also creates a regression for
> > snapdragon845 based systems and probably on any QC chipsets that use a
> > LUT to update the OPP table at boot. The behavior is the same as
> > described by Sam with a staled value in sugov_policy.max field.
>
> Thanks for sharing this info and apologies that you spent cycles
> on it.
>
> I have checked that whole setup code (capacity + cpufreq policy and
> governor). It looks like to have a proper capacity of CPUs, we need
> to wait till the last policy is created. It's due to the arch_topology.c
> mechanism which is only triggered after all CPUs' got the policy.
> Unfortunately, this leads to a chicken & egg situation for this
> schedutil setup of max capacity.
>
> I have experimented with this code, which triggers an update in
> the schedutil, when all CPUs got the policy and sugov gov
> (with trace_printk() to mach the output below)

Your proposal below looks similar to what is done in arch_topology.c.
arch_topology.c triggers a rebuild of sched_domain and removes its
cpufreq notifier cb once it has visited all CPUs, could it also
trigger an update of CPU's policy with cpufreq_update_policy() ?

At this point you will be sure that the normalization has happened and
the max capacity will not change.

I don't know if it's a global problem or only for systems using arch_topology

>
> -------------------------8<-----------------------------------------
> diff --git a/kernel/sched/cpufreq_schedutil.c
> b/kernel/sched/cpufreq_schedutil.c
> index 9161d1136d01..f1913a857218 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -59,6 +59,7 @@ struct sugov_cpu {
>   };
>
>   static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
> +static cpumask_var_t cpus_to_visit;
>
>   /************************ Governor internals ***********************/
>
> @@ -783,6 +784,22 @@ static int sugov_start(struct cpufreq_policy *policy)
>
>                  cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
> uu);
>          }
> +
> +       cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
> +
> +       if (cpumask_empty(cpus_to_visit)) {
> +               trace_printk("schedutil the visit cpu mask is empty now\n");
> +               for_each_possible_cpu(cpu) {
> +                       struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
> +                       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
> +
> +                       sg_policy->max = arch_scale_cpu_capacity(cpu);
> +
> +                       trace_printk("SCHEDUTIL: NEW  CPU%u
> cpu_capacity=%lu\n",
> +                               cpu, sg_policy->max);
> +               }
> +       }
> +
>          return 0;
>   }
>
> @@ -800,6 +817,8 @@ static void sugov_stop(struct cpufreq_policy *policy)
>                  irq_work_sync(&sg_policy->irq_work);
>                  kthread_cancel_work_sync(&sg_policy->work);
>          }
> +
> +       cpumask_or(cpus_to_visit, cpus_to_visit, policy->related_cpus);
>   }
>
>   static void sugov_limits(struct cpufreq_policy *policy)
> @@ -829,6 +848,11 @@ struct cpufreq_governor schedutil_gov = {
>   #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
>   struct cpufreq_governor *cpufreq_default_governor(void)
>   {
> +       if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
> +               return NULL;
> +
> +       cpumask_copy(cpus_to_visit, cpu_possible_mask);
> +
>          return &schedutil_gov;
>   }
>   #endif
>
> ---------------------------------->8---------------------------------
>
>
> That simple approach fixes the issue. I have also tested it with
> governor change a few times and setting back the schedutil.
>
> -------------------------------------------
>     kworker/u12:1-48      [004] .....     2.208847: sugov_start:
> schedutil the visit cpu mask is empty now
>     kworker/u12:1-48      [004] .....     2.208854: sugov_start:
> SCHEDUTIL: NEW  CPU0 cpu_capacity=381
>     kworker/u12:1-48      [004] .....     2.208857: sugov_start:
> SCHEDUTIL: NEW  CPU1 cpu_capacity=381
>     kworker/u12:1-48      [004] .....     2.208860: sugov_start:
> SCHEDUTIL: NEW  CPU2 cpu_capacity=381
>     kworker/u12:1-48      [004] .....     2.208862: sugov_start:
> SCHEDUTIL: NEW  CPU3 cpu_capacity=381
>     kworker/u12:1-48      [004] .....     2.208864: sugov_start:
> SCHEDUTIL: NEW  CPU4 cpu_capacity=1024
>     kworker/u12:1-48      [004] .....     2.208866: sugov_start:
> SCHEDUTIL: NEW  CPU5 cpu_capacity=1024
>              bash-615     [005] .....    35.317113: sugov_start:
> schedutil the visit cpu mask is empty now
>              bash-615     [005] .....    35.317120: sugov_start:
> SCHEDUTIL: NEW  CPU0 cpu_capacity=381
>              bash-615     [005] .....    35.317123: sugov_start:
> SCHEDUTIL: NEW  CPU1 cpu_capacity=381
>              bash-615     [005] .....    35.317125: sugov_start:
> SCHEDUTIL: NEW  CPU2 cpu_capacity=381
>              bash-615     [005] .....    35.317127: sugov_start:
> SCHEDUTIL: NEW  CPU3 cpu_capacity=381
>              bash-615     [005] .....    35.317129: sugov_start:
> SCHEDUTIL: NEW  CPU4 cpu_capacity=1024
>              bash-615     [005] .....    35.317131: sugov_start:
> SCHEDUTIL: NEW  CPU5 cpu_capacity=1024
>              bash-623     [003] .....    57.633328: sugov_start:
> schedutil the visit cpu mask is empty now
>              bash-623     [003] .....    57.633336: sugov_start:
> SCHEDUTIL: NEW  CPU0 cpu_capacity=381
>              bash-623     [003] .....    57.633339: sugov_start:
> SCHEDUTIL: NEW  CPU1 cpu_capacity=381
>              bash-623     [003] .....    57.633340: sugov_start:
> SCHEDUTIL: NEW  CPU2 cpu_capacity=381
>              bash-623     [003] .....    57.633342: sugov_start:
> SCHEDUTIL: NEW  CPU3 cpu_capacity=381
>              bash-623     [003] .....    57.633343: sugov_start:
> SCHEDUTIL: NEW  CPU4 cpu_capacity=1024
>              bash-623     [003] .....    57.633344: sugov_start:
> SCHEDUTIL: NEW  CPU5 cpu_capacity=1024
> ----------------------------------------------------
>
> It should work.
>
> Regards,
> Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-30 14:29               ` Vincent Guittot
@ 2022-11-30 15:00                 ` Lukasz Luba
  0 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2022-11-30 15:00 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Rafael J. Wysocki, Sam Wu, Saravana Kannan, Viresh Kumar,
	Ingo Molnar, Peter Zijlstra, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Valentin Schneider,
	Isaac J . Manjarres, kernel-team, Rafael J. Wysocki, linux-pm,
	linux-kernel



On 11/30/22 14:29, Vincent Guittot wrote:
> On Wed, 30 Nov 2022 at 15:04, Lukasz Luba <lukasz.luba@arm.com> wrote:
>>
>> Hi Vincent,
>>
>> On 11/30/22 10:42, Vincent Guittot wrote:
>>> Hi All
>>>
>>> Just for the log and because it took me a while to figure out the root
>>> cause of the problem: This patch also creates a regression for
>>> snapdragon845 based systems and probably on any QC chipsets that use a
>>> LUT to update the OPP table at boot. The behavior is the same as
>>> described by Sam with a staled value in sugov_policy.max field.
>>
>> Thanks for sharing this info and apologies that you spent cycles
>> on it.
>>
>> I have checked that whole setup code (capacity + cpufreq policy and
>> governor). It looks like to have a proper capacity of CPUs, we need
>> to wait till the last policy is created. It's due to the arch_topology.c
>> mechanism which is only triggered after all CPUs' got the policy.
>> Unfortunately, this leads to a chicken & egg situation for this
>> schedutil setup of max capacity.
>>
>> I have experimented with this code, which triggers an update in
>> the schedutil, when all CPUs got the policy and sugov gov
>> (with trace_printk() to mach the output below)
> 
> Your proposal below looks similar to what is done in arch_topology.c.

Yes, even the name 'cpus_to_visit' looks similar ;)

> arch_topology.c triggers a rebuild of sched_domain and removes its
> cpufreq notifier cb once it has visited all CPUs, could it also
> trigger an update of CPU's policy with cpufreq_update_policy() ?
> 
> At this point you will be sure that the normalization has happened and
> the max capacity will not change.

True, they are done under that blocking notification chain, for the
last policy init. This is before the last time we call the
schedutil sugov_start with that last policy. That's why this code
is able to see that properly normalized max capacity under the:
trace_printk("schedutil the visit cpu mask is empty now\n");


> 
> I don't know if it's a global problem or only for systems using arch_topology
> 

It would only be for those with arch_topology, so only our asymmetric
systems AFAICS.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-11-10 19:57 [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Sam Wu
                   ` (2 preceding siblings ...)
  2022-11-20 18:07 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" #forregzbot Thorsten Leemhuis
@ 2022-12-05  9:18 ` Viresh Kumar
  2022-12-06  8:17   ` Lukasz Luba
  3 siblings, 1 reply; 16+ messages in thread
From: Viresh Kumar @ 2022-12-05  9:18 UTC (permalink / raw)
  To: Lukasz Luba
  Cc: Rafael J. Wysocki, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Sam Wu, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Saravana Kannan, Isaac J . Manjarres,
	kernel-team, Rafael J. Wysocki, linux-pm, linux-kernel

Lukasz,

On 10-11-22, 19:57, Sam Wu wrote:
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 9161d1136d01..1207c78f85c1 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -25,9 +25,6 @@ struct sugov_policy {
>  	unsigned int		next_freq;
>  	unsigned int		cached_raw_freq;
>  
> -	/* max CPU capacity, which is equal for all CPUs in freq. domain */
> -	unsigned long		max;
> -
>  	/* The next fields are only needed if fast switch cannot be used: */
>  	struct			irq_work irq_work;
>  	struct			kthread_work work;
> @@ -51,6 +48,7 @@ struct sugov_cpu {
>  
>  	unsigned long		util;
>  	unsigned long		bw_dl;
> +	unsigned long		max;

IIUC, this part, i.e. moving max to sugov_policy, wasn't the problem
here, right ? Can you send a patch for that at least first, since this
is fully reverted now.

Or it doesn't make sense?

-- 
viresh

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
  2022-12-05  9:18 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Viresh Kumar
@ 2022-12-06  8:17   ` Lukasz Luba
  0 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2022-12-06  8:17 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J. Wysocki, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Sam Wu, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Saravana Kannan, Isaac J . Manjarres,
	kernel-team, Rafael J. Wysocki, linux-pm, linux-kernel

Hi Viresh,

On 12/5/22 09:18, Viresh Kumar wrote:
> Lukasz,
> 
> On 10-11-22, 19:57, Sam Wu wrote:
>> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
>> index 9161d1136d01..1207c78f85c1 100644
>> --- a/kernel/sched/cpufreq_schedutil.c
>> +++ b/kernel/sched/cpufreq_schedutil.c
>> @@ -25,9 +25,6 @@ struct sugov_policy {
>>   	unsigned int		next_freq;
>>   	unsigned int		cached_raw_freq;
>>   
>> -	/* max CPU capacity, which is equal for all CPUs in freq. domain */
>> -	unsigned long		max;
>> -
>>   	/* The next fields are only needed if fast switch cannot be used: */
>>   	struct			irq_work irq_work;
>>   	struct			kthread_work work;
>> @@ -51,6 +48,7 @@ struct sugov_cpu {
>>   
>>   	unsigned long		util;
>>   	unsigned long		bw_dl;
>> +	unsigned long		max;
> 
> IIUC, this part, i.e. moving max to sugov_policy, wasn't the problem
> here, right ? Can you send a patch for that at least first, since this
> is fully reverted now.
> 
> Or it doesn't make sense?
> 

Yes, that still could make sense. We could still optimize a bit that
code in the sugov_next_freq_shared(). Look at that function. It loops
over all CPUs in the policy and calls sugov_get_util() which calls
this arch_scale_cpu_capacity() N times. Then it does those
multiplications below:

if (j_util * max > j_max * util)

which will be 2*N mul operations...
IMO this is pointless and heavy for LITTLE cores which are 4 or
sometimes 6 in the policy.

As you could see, my code just left that loop with a simple
max() operation.

I might just attack this code differently. Switch to that
sugov_policy::max, fetch the cpu capacity only once, before that loop.
I will rewrite a bit the sugov_get_util() and adjust the
2nd user of it: sugov_update_single_common()

Regards,
Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-12-06  8:19 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-10 19:57 [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Sam Wu
2022-11-15 22:35 ` Saravana Kannan
2022-11-16 11:43   ` Lukasz Luba
2022-11-16 12:17     ` Rafael J. Wysocki
2022-11-18  1:00     ` Sam Wu
2022-11-21 19:18       ` Rafael J. Wysocki
2022-11-22  8:58         ` Lukasz Luba
2022-11-30 10:42           ` Vincent Guittot
2022-11-30 14:04             ` Lukasz Luba
2022-11-30 14:29               ` Vincent Guittot
2022-11-30 15:00                 ` Lukasz Luba
2022-11-16 11:35 ` Lukasz Luba
2022-11-20 18:07 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" #forregzbot Thorsten Leemhuis
2022-11-27 12:06   ` Thorsten Leemhuis
2022-12-05  9:18 ` [PATCH v1] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" Viresh Kumar
2022-12-06  8:17   ` Lukasz Luba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).