All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched/uclamp:  Introduce a method to transform UCLAMP_MIN into BOOST
@ 2021-07-21  7:57 Xuewen Yan
  2021-07-23 15:19 ` Dietmar Eggemann
  0 siblings, 1 reply; 8+ messages in thread
From: Xuewen Yan @ 2021-07-21  7:57 UTC (permalink / raw)
  To: dietmar.eggemann, mingo, peterz, juri.lelli, vincent.guittot
  Cc: rostedt, bsegall, mgorman, bristot, mcgrof, keescook, yzaikin,
	pjt, qais.yousef, qperret, linux-kernel, linux-fsdevel

From: Xuewen Yan <xuewen.yan@unisoc.com>

The uclamp can clamp the util within uclamp_min and uclamp_max,
it is benifit to some tasks with small util, but for those tasks
with middle util, it is useless.

To speed up those tasks, convert UCLAMP_MIN to BOOST,
the BOOST as schedtune does:

boot = uclamp_min / SCHED_CAPACITY_SCALE;
margin = boost * (uclamp_max - util)
boost_util = util + margin;

Scenario:
if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}

without patch:
clamp_util = 200;

with patch:
clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;

On the other hand, adding a SYS interface to  allow users
to configure it according to their own needs.

Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
---
 include/linux/sched/sysctl.h |  1 +
 kernel/sched/core.c          | 19 +++++++++++++++++++
 kernel/sched/fair.c          | 15 ++++++++++++---
 kernel/sched/sched.h         | 10 +++++++++-
 kernel/sysctl.c              |  9 +++++++++
 5 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index db2c0f34aaaf..97d8c5ecd4e6 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -69,6 +69,7 @@ extern unsigned int sysctl_sched_dl_period_min;
 extern unsigned int sysctl_sched_uclamp_util_min;
 extern unsigned int sysctl_sched_uclamp_util_max;
 extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
+extern unsigned int sysctl_sched_uclamp_min_to_boost;
 #endif
 
 #ifdef CONFIG_CFS_BANDWIDTH
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d9ff40f4661..8a49f9962cda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1245,6 +1245,9 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
  */
 unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
 
+/* map util clamp_min to boost */
+unsigned int sysctl_sched_uclamp_min_to_boost;
+
 /* All clamps are required to be less or equal than these values */
 static struct uclamp_se uclamp_default[UCLAMP_CNT];
 
@@ -1448,6 +1451,22 @@ uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
 	return uc_req;
 }
 
+unsigned long uclamp_transform_boost(unsigned long util, unsigned long uclamp_min,
+				    unsigned long uclamp_max)
+{
+	unsigned long margin;
+
+	if (unlikely(uclamp_min > uclamp_max))
+		return util;
+
+	if (util >= uclamp_max)
+		return uclamp_max;
+
+	margin = DIV_ROUND_CLOSEST_ULL(uclamp_min * (uclamp_max - util),
+					SCHED_CAPACITY_SCALE);
+	return util + margin;
+}
+
 unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
 {
 	struct uclamp_se uc_eff;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 44c452072a1b..790dfbb6c897 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3934,9 +3934,18 @@ static inline unsigned long task_util_est(struct task_struct *p)
 #ifdef CONFIG_UCLAMP_TASK
 static inline unsigned long uclamp_task_util(struct task_struct *p)
 {
-	return clamp(task_util_est(p),
-		     uclamp_eff_value(p, UCLAMP_MIN),
-		     uclamp_eff_value(p, UCLAMP_MAX));
+	unsigned long min_util = uclamp_eff_value(p, UCLAMP_MIN);
+	unsigned long max_util = uclamp_eff_value(p, UCLAMP_MAX);
+	unsigned long clamp_util, util;
+
+	util = task_util_est(p);
+
+	if (sysctl_sched_uclamp_min_to_boost)
+		clamp_util = uclamp_transform_boost(util, min_util, max_util);
+	else
+		clamp_util = clamp(util, min_util, max_util);
+
+	return clamp_util;
 }
 #else
 static inline unsigned long uclamp_task_util(struct task_struct *p)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14a41a243f7b..73657be84678 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2796,6 +2796,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 
 #ifdef CONFIG_UCLAMP_TASK
 unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+unsigned long uclamp_transform_boost(unsigned long util, unsigned long uclamp_min,
+				     unsigned long uclamp_max);
 
 /**
  * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
@@ -2820,6 +2822,7 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
 {
 	unsigned long min_util = 0;
 	unsigned long max_util = 0;
+	unsigned long clamp_util;
 
 	if (!static_branch_likely(&sched_uclamp_used))
 		return util;
@@ -2847,7 +2850,12 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
 	if (unlikely(min_util >= max_util))
 		return min_util;
 
-	return clamp(util, min_util, max_util);
+	if (sysctl_sched_uclamp_min_to_boost)
+		clamp_util = uclamp_transform_boost(util, min_util, max_util);
+	else
+		clamp_util = clamp(util, min_util, max_util);
+
+	return clamp_util;
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 272f4a272f8c..b3a83356a969 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1827,6 +1827,15 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sysctl_sched_uclamp_handler,
 	},
+	{
+		.procname	= "sched_clamp_min2boost",
+		.data		= &sysctl_sched_uclamp_min_to_boost,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 #endif
 #ifdef CONFIG_SCHED_AUTOGROUP
 	{
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-21  7:57 [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST Xuewen Yan
@ 2021-07-23 15:19 ` Dietmar Eggemann
  2021-07-24  2:03   ` Xuewen Yan
  0 siblings, 1 reply; 8+ messages in thread
From: Dietmar Eggemann @ 2021-07-23 15:19 UTC (permalink / raw)
  To: Xuewen Yan, mingo, peterz, juri.lelli, vincent.guittot
  Cc: rostedt, bsegall, mgorman, bristot, mcgrof, keescook, yzaikin,
	pjt, qais.yousef, qperret, linux-kernel, linux-fsdevel

On 21/07/2021 09:57, Xuewen Yan wrote:
> From: Xuewen Yan <xuewen.yan@unisoc.com>
> 
> The uclamp can clamp the util within uclamp_min and uclamp_max,
> it is benifit to some tasks with small util, but for those tasks
> with middle util, it is useless.
> 
> To speed up those tasks, convert UCLAMP_MIN to BOOST,
> the BOOST as schedtune does:

Maybe it's important to note here that schedtune is the `out-of-tree`
predecessor of uclamp used in some Android versions.

> boot = uclamp_min / SCHED_CAPACITY_SCALE;
> margin = boost * (uclamp_max - util)
> boost_util = util + margin;

This is essentially the functionality from schedtune_margin() in
Android, right?

So in your implementation, the margin (i.e. what is added to the task
util) not only depends on uclamp_min, but also on `uclamp_max`?

> Scenario:
> if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}
> 
> without patch:
> clamp_util = 200;
> 
> with patch:
> clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;

The same could be achieved by using {uclamp_min, uclamp_max} = {280, 1024}?

Uclamp_min is meant to be the final `boost( = util + margin)`
information. You just have to set it appropriately to the task (via
per-task and/or per-cgroup interface).

Uclamp_min is for `boosting`, Uclamp max is for `capping` CPU frequency.

[...]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-23 15:19 ` Dietmar Eggemann
@ 2021-07-24  2:03   ` Xuewen Yan
  2021-07-26 17:17     ` Qais Yousef
  0 siblings, 1 reply; 8+ messages in thread
From: Xuewen Yan @ 2021-07-24  2:03 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Steven Rostedt, Benjamin Segall, Mel Gorman,
	Daniel Bristot de Oliveira, mcgrof, Kees Cook, Iurii Zaikin,
	Paul Turner, Qais Yousef, Quentin Perret, linux-kernel,
	linux-fsdevel

On Fri, Jul 23, 2021 at 11:19 PM Dietmar Eggemann
<dietmar.eggemann@arm.com> wrote:
>
> On 21/07/2021 09:57, Xuewen Yan wrote:
> > From: Xuewen Yan <xuewen.yan@unisoc.com>
> >
> > The uclamp can clamp the util within uclamp_min and uclamp_max,
> > it is benifit to some tasks with small util, but for those tasks
> > with middle util, it is useless.
> >
> > To speed up those tasks, convert UCLAMP_MIN to BOOST,
> > the BOOST as schedtune does:
>
> Maybe it's important to note here that schedtune is the `out-of-tree`
> predecessor of uclamp used in some Android versions.

Yes, and the patch is indeed used on Android which kernel version is 5.4+.
Because the kernel used in Android do not have the schedtune, and the
uclamp can not
boost all the util, and this is the reason for the design of the patch.

>
> > boot = uclamp_min / SCHED_CAPACITY_SCALE;
> > margin = boost * (uclamp_max - util)
> > boost_util = util + margin;
>
> This is essentially the functionality from schedtune_margin() in
> Android, right?

YES!

>
> So in your implementation, the margin (i.e. what is added to the task
> util) not only depends on uclamp_min, but also on `uclamp_max`?

Yes, because we do not want to convert completely the uclamp to schedtune,
we also want user can limit some tasks, so the UCLAMP_MAX's meaning
has not been changed,
meanwhile, the UCLAMP_MAX also can affect the margin.

>
> > Scenario:
> > if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}
> >
> > without patch:
> > clamp_util = 200;
> >
> > with patch:
> > clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;
>
> The same could be achieved by using {uclamp_min, uclamp_max} = {280, 1024}?

Yes, for per-task, that is no problem, but for per-cgroup, most times,
we can not always only put the special task into the cgroup.
For example, in Android , there is a cgroup named "top-app", often all
the threads of a app would be put into it.
But, not all threads of this app need to be boosted, if we set the
uclamp_min too big, the all the small task would be clamped to
uclamp_min,
the power consumption would be increased, howerever, if setting the
uclamp_min smaller, the performance may be increased.
Such as:
a task's util is 50,  {uclamp_min, uclamp_max} = {100, 1024}
the boost_util =  50 + (100 / 1024) * (1024 - 50) = 145;
but if we set {uclamp_min, uclamp_max} = {280, 1024}, without patch:
the clamp_util = 280.

>
> Uclamp_min is meant to be the final `boost( = util + margin)`
> information. You just have to set it appropriately to the task (via
> per-task and/or per-cgroup interface).

As said above, it is difficult to set the per-cgroup's uclamp_min for
all tasks in Android sometimes.

>
> Uclamp_min is for `boosting`, Uclamp max is for `capping` CPU frequency.

Yes!

>

Thanks!
xuewen

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-24  2:03   ` Xuewen Yan
@ 2021-07-26 17:17     ` Qais Yousef
  2021-07-27 12:16       ` Xuewen Yan
  0 siblings, 1 reply; 8+ messages in thread
From: Qais Yousef @ 2021-07-26 17:17 UTC (permalink / raw)
  To: Xuewen Yan
  Cc: Dietmar Eggemann, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Steven Rostedt, Benjamin Segall, Mel Gorman,
	Daniel Bristot de Oliveira, mcgrof, Kees Cook, Iurii Zaikin,
	Paul Turner, Quentin Perret, linux-kernel, linux-fsdevel

Hi Xuewen

On 07/24/21 10:03, Xuewen Yan wrote:
> On Fri, Jul 23, 2021 at 11:19 PM Dietmar Eggemann
> <dietmar.eggemann@arm.com> wrote:
> >
> > On 21/07/2021 09:57, Xuewen Yan wrote:
> > > From: Xuewen Yan <xuewen.yan@unisoc.com>
> > >
> > > The uclamp can clamp the util within uclamp_min and uclamp_max,
> > > it is benifit to some tasks with small util, but for those tasks
> > > with middle util, it is useless.

It's not really useless, it works as it's designed ;-)

As Dietmar highlighted, you need to pick a higher boost value that gives you
the best perf/watt for your use case.

> > >
> > > To speed up those tasks, convert UCLAMP_MIN to BOOST,
> > > the BOOST as schedtune does:
> >
> > Maybe it's important to note here that schedtune is the `out-of-tree`
> > predecessor of uclamp used in some Android versions.
> 
> Yes, and the patch is indeed used on Android which kernel version is 5.4+.

I assume that this is a patch in your own Android 5.4 kernel, right? I'm not
aware of any such patch in Android Common Kernel. If it's there, do you mind
pointing me to the gerrit change that introduced it?

> Because the kernel used in Android do not have the schedtune, and the
> uclamp can not
> boost all the util, and this is the reason for the design of the patch.

Do you have a specific workload in mind here that is failing? It would help if
you can explain in detail the mode of failure you're seeing to help us
understand the problem better.

> 
> >
> > > boot = uclamp_min / SCHED_CAPACITY_SCALE;
> > > margin = boost * (uclamp_max - util)
> > > boost_util = util + margin;
> >
> > This is essentially the functionality from schedtune_margin() in
> > Android, right?
> 
> YES!
> 
> >
> > So in your implementation, the margin (i.e. what is added to the task
> > util) not only depends on uclamp_min, but also on `uclamp_max`?
> 
> Yes, because we do not want to convert completely the uclamp to schedtune,
> we also want user can limit some tasks, so the UCLAMP_MAX's meaning
> has not been changed,
> meanwhile, the UCLAMP_MAX also can affect the margin.
> 
> >
> > > Scenario:
> > > if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}
> > >
> > > without patch:
> > > clamp_util = 200;
> > >
> > > with patch:
> > > clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;

If a task util was 200, how long does it take for it to reach 280? Why do you
need to have this immediate boost value applied and can't wait for this time to
lapse? I'm not sure, but ramping up by 80 points shouldn't take *that* long,
but don't quote me on this :-)

> >
> > The same could be achieved by using {uclamp_min, uclamp_max} = {280, 1024}?
> 
> Yes, for per-task, that is no problem, but for per-cgroup, most times,
> we can not always only put the special task into the cgroup.
> For example, in Android , there is a cgroup named "top-app", often all
> the threads of a app would be put into it.
> But, not all threads of this app need to be boosted, if we set the
> uclamp_min too big, the all the small task would be clamped to
> uclamp_min,
> the power consumption would be increased, howerever, if setting the
> uclamp_min smaller, the performance may be increased.
> Such as:
> a task's util is 50,  {uclamp_min, uclamp_max} = {100, 1024}
> the boost_util =  50 + (100 / 1024) * (1024 - 50) = 145;
> but if we set {uclamp_min, uclamp_max} = {280, 1024}, without patch:
> the clamp_util = 280.

I assume {uclamp_min, uclamp_max} = {145, 1024} is not good enough because you
want this 200 task to be boosted to 280. One can argue that not all tasks at
200 need to be boosted to 280 too. So the question is, like above, what type
of tasks that are failing here and how do you observe this failure? It seems
there's a class of performance critical tasks that need this fast boosting.
Can't you identify them and boost them individually?

There's nothing that prevents you to change the uclamp_min of the cgroup
dynamically by the way. Like for instance when an app launches you can choose
a high boost value then lower it once it started up. Or if you know the top-app
is a game and you want to guarantee a good minimum performance for it; you
can choose to increase the top-app uclamp_min value too in a special gaming
mode or something.

For best perf/watt, using the per-task API is the best way forward. But
I understand it'll take time for apps/android framework to learn how to use the
per-task API most effectively. But it is what we should be aiming for.

Cheers

--
Qais Yousef

> 
> >
> > Uclamp_min is meant to be the final `boost( = util + margin)`
> > information. You just have to set it appropriately to the task (via
> > per-task and/or per-cgroup interface).
> 
> As said above, it is difficult to set the per-cgroup's uclamp_min for
> all tasks in Android sometimes.
> 
> >
> > Uclamp_min is for `boosting`, Uclamp max is for `capping` CPU frequency.
> 
> Yes!
> 
> >
> 
> Thanks!
> xuewen

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-26 17:17     ` Qais Yousef
@ 2021-07-27 12:16       ` Xuewen Yan
  2021-07-27 13:45         ` Qais Yousef
  0 siblings, 1 reply; 8+ messages in thread
From: Xuewen Yan @ 2021-07-27 12:16 UTC (permalink / raw)
  To: Qais Yousef
  Cc: Dietmar Eggemann, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Steven Rostedt, Benjamin Segall, Mel Gorman,
	Daniel Bristot de Oliveira, mcgrof, Kees Cook, Iurii Zaikin,
	Paul Turner, Quentin Perret, linux-kernel, linux-fsdevel

Hi Qais

On Tue, Jul 27, 2021 at 1:17 AM Qais Yousef <qais.yousef@arm.com> wrote:
>
> > > >
> > > > The uclamp can clamp the util within uclamp_min and uclamp_max,
> > > > it is benifit to some tasks with small util, but for those tasks
> > > > with middle util, it is useless.
>
> It's not really useless, it works as it's designed ;-)

Yes, my expression problem...

>
> As Dietmar highlighted, you need to pick a higher boost value that gives you
> the best perf/watt for your use case.
>
> I assume that this is a patch in your own Android 5.4 kernel, right? I'm not

Yes, the patch indeed is used in my own Android12 with kernel5.4.

> aware of any such patch in Android Common Kernel. If it's there, do you mind
> pointing me to the gerrit change that introduced it?

emmm, sorry I kind of understand what that means.  Your means is  what
I need to do is to send this patch to google?

>
> > Because the kernel used in Android do not have the schedtune, and the
> > uclamp can not
> > boost all the util, and this is the reason for the design of the patch.
>
> Do you have a specific workload in mind here that is failing? It would help if
> you can explain in detail the mode of failure you're seeing to help us
> understand the problem better.

The patch has has been working with me for a while, I can redo this
data, but this might take a while :)

> >
> > >
> > > > Scenario:
> > > > if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}
> > > >
> > > > without patch:
> > > > clamp_util = 200;
> > > >
> > > > with patch:
> > > > clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;
>
> If a task util was 200, how long does it take for it to reach 280? Why do you
> need to have this immediate boost value applied and can't wait for this time to
> lapse? I'm not sure, but ramping up by 80 points shouldn't take *that* long,
> but don't quote me on this :-)

Here is just one example to illustrate that , with this patch, It also
can boost the util which in {UCLAMP_MIN, UCLAMP_MAX}...

>
> > >
> > > The same could be achieved by using {uclamp_min, uclamp_max} = {280, 1024}?
> >
> > Yes, for per-task, that is no problem, but for per-cgroup, most times,
> > we can not always only put the special task into the cgroup.
> > For example, in Android , there is a cgroup named "top-app", often all
> > the threads of a app would be put into it.
> > But, not all threads of this app need to be boosted, if we set the
> > uclamp_min too big, the all the small task would be clamped to
> > uclamp_min,
> > the power consumption would be increased, howerever, if setting the
> > uclamp_min smaller, the performance may be increased.
> > Such as:
> > a task's util is 50,  {uclamp_min, uclamp_max} = {100, 1024}
> > the boost_util =  50 + (100 / 1024) * (1024 - 50) = 145;
> > but if we set {uclamp_min, uclamp_max} = {280, 1024}, without patch:
> > the clamp_util = 280.
>
> I assume {uclamp_min, uclamp_max} = {145, 1024} is not good enough because you
> want this 200 task to be boosted to 280. One can argue that not all tasks at
> 200 need to be boosted to 280 too. So the question is, like above, what type
> of tasks that are failing here and how do you observe this failure? It seems
> there's a class of performance critical tasks that need this fast boosting.
> Can't you identify them and boost them individually?

Yes, the best way to do that is boosting them individually, but
usually, it may not be so easy...

>
> There's nothing that prevents you to change the uclamp_min of the cgroup
> dynamically by the way. Like for instance when an app launches you can choose
> a high boost value then lower it once it started up. Or if you know the top-app
> is a game and you want to guarantee a good minimum performance for it; you
> can choose to increase the top-app uclamp_min value too in a special gaming
> mode or something.
>
> For best perf/watt, using the per-task API is the best way forward. But
> I understand it'll take time for apps/android framework to learn how to use the
> per-task API most effectively. But it is what we should be aiming for.

Yes, and I have learned that there is an ADPF framework in Android12,
It can dynamically adjust the per-task's  uclamp_min/max to boost the
task.
Compared to the rough behavior of patch, ADPF may perform better , but
I need to test and compare them...

Thanks!
xuewen.yan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-27 12:16       ` Xuewen Yan
@ 2021-07-27 13:45         ` Qais Yousef
  2021-07-28  1:40           ` Xuewen Yan
  0 siblings, 1 reply; 8+ messages in thread
From: Qais Yousef @ 2021-07-27 13:45 UTC (permalink / raw)
  To: Xuewen Yan
  Cc: Dietmar Eggemann, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Steven Rostedt, Benjamin Segall, Mel Gorman,
	Daniel Bristot de Oliveira, mcgrof, Kees Cook, Iurii Zaikin,
	Paul Turner, Quentin Perret, linux-kernel, linux-fsdevel

Hi Xuewen

On 07/27/21 20:16, Xuewen Yan wrote:
> Hi Qais
> 
> On Tue, Jul 27, 2021 at 1:17 AM Qais Yousef <qais.yousef@arm.com> wrote:
> >
> > > > >
> > > > > The uclamp can clamp the util within uclamp_min and uclamp_max,
> > > > > it is benifit to some tasks with small util, but for those tasks
> > > > > with middle util, it is useless.
> >
> > It's not really useless, it works as it's designed ;-)
> 
> Yes, my expression problem...

No worries, I understood what you meant. But I had to highlight that this is
the intended design behavior :-)

> 
> >
> > As Dietmar highlighted, you need to pick a higher boost value that gives you
> > the best perf/watt for your use case.
> >
> > I assume that this is a patch in your own Android 5.4 kernel, right? I'm not
> 
> Yes, the patch indeed is used in my own Android12 with kernel5.4.
> 
> > aware of any such patch in Android Common Kernel. If it's there, do you mind
> > pointing me to the gerrit change that introduced it?
> 
> emmm, sorry I kind of understand what that means.  Your means is  what
> I need to do is to send this patch to google?

Oh no. I meant if you are *not* carrying this patch in your own, I'd appreciate
getting a link to when it was merged into Google' tree. But you already said
you carry this patch on your own kernel, so there's nothing to do :)

> 
> >
> > > Because the kernel used in Android do not have the schedtune, and the
> > > uclamp can not
> > > boost all the util, and this is the reason for the design of the patch.
> >
> > Do you have a specific workload in mind here that is failing? It would help if
> > you can explain in detail the mode of failure you're seeing to help us
> > understand the problem better.
> 
> The patch has has been working with me for a while, I can redo this
> data, but this might take a while :)

But there must have been a reason you introduced it in the first place, what
was that reason?

> 
> > >
> > > >
> > > > > Scenario:
> > > > > if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}
> > > > >
> > > > > without patch:
> > > > > clamp_util = 200;
> > > > >
> > > > > with patch:
> > > > > clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;
> >
> > If a task util was 200, how long does it take for it to reach 280? Why do you
> > need to have this immediate boost value applied and can't wait for this time to
> > lapse? I'm not sure, but ramping up by 80 points shouldn't take *that* long,
> > but don't quote me on this :-)
> 
> Here is just one example to illustrate that , with this patch, It also
> can boost the util which in {UCLAMP_MIN, UCLAMP_MAX}...
> 
> >
> > > >
> > > > The same could be achieved by using {uclamp_min, uclamp_max} = {280, 1024}?
> > >
> > > Yes, for per-task, that is no problem, but for per-cgroup, most times,
> > > we can not always only put the special task into the cgroup.
> > > For example, in Android , there is a cgroup named "top-app", often all
> > > the threads of a app would be put into it.
> > > But, not all threads of this app need to be boosted, if we set the
> > > uclamp_min too big, the all the small task would be clamped to
> > > uclamp_min,
> > > the power consumption would be increased, howerever, if setting the
> > > uclamp_min smaller, the performance may be increased.
> > > Such as:
> > > a task's util is 50,  {uclamp_min, uclamp_max} = {100, 1024}
> > > the boost_util =  50 + (100 / 1024) * (1024 - 50) = 145;
> > > but if we set {uclamp_min, uclamp_max} = {280, 1024}, without patch:
> > > the clamp_util = 280.
> >
> > I assume {uclamp_min, uclamp_max} = {145, 1024} is not good enough because you
> > want this 200 task to be boosted to 280. One can argue that not all tasks at
> > 200 need to be boosted to 280 too. So the question is, like above, what type
> > of tasks that are failing here and how do you observe this failure? It seems
> > there's a class of performance critical tasks that need this fast boosting.
> > Can't you identify them and boost them individually?
> 
> Yes, the best way to do that is boosting them individually, but
> usually, it may not be so easy...

Yes I appreciate that, but cgroup is a coarse grain controller. Even with your
approach, you will still have to find the best compromise because some tasks
will get more boosting than they really need to and waste power even with your
approach.

For best outcome with uclamp; the cgroup should be used to specify the minimum
performance requirement of a class of tasks, then use the per-task API to fine
tune the settings for specific tasks.

I appreciate it'll take time to get there, but this is the best way forward.

If you have a specific use case that's failing, it will still be good to share
the details to think more if there's something we can do about it at the kernel
level.

Thanks

--
Qais Yousef

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-27 13:45         ` Qais Yousef
@ 2021-07-28  1:40           ` Xuewen Yan
  2021-07-28 15:34             ` Qais Yousef
  0 siblings, 1 reply; 8+ messages in thread
From: Xuewen Yan @ 2021-07-28  1:40 UTC (permalink / raw)
  To: Qais Yousef
  Cc: Dietmar Eggemann, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Steven Rostedt, Benjamin Segall, Mel Gorman,
	Daniel Bristot de Oliveira, mcgrof, Kees Cook, Iurii Zaikin,
	Paul Turner, Quentin Perret, linux-kernel, linux-fsdevel

Hi Qais

Thanks for your patient reply, and I have got that I need to do more
work in uclamp to balance the performance and power, especially in
per-task API.
And If there is any progress in the future, I hope to keep
communicating with you.

Thank you very much!

BR
xuewen

On Tue, Jul 27, 2021 at 9:45 PM Qais Yousef <qais.yousef@arm.com> wrote:
>
> Hi Xuewen
>
> On 07/27/21 20:16, Xuewen Yan wrote:
> > Hi Qais
> >
> > On Tue, Jul 27, 2021 at 1:17 AM Qais Yousef <qais.yousef@arm.com> wrote:
> > >
> > > > > >
> > > > > > The uclamp can clamp the util within uclamp_min and uclamp_max,
> > > > > > it is benifit to some tasks with small util, but for those tasks
> > > > > > with middle util, it is useless.
> > >
> > > It's not really useless, it works as it's designed ;-)
> >
> > Yes, my expression problem...
>
> No worries, I understood what you meant. But I had to highlight that this is
> the intended design behavior :-)
>
> >
> > >
> > > As Dietmar highlighted, you need to pick a higher boost value that gives you
> > > the best perf/watt for your use case.
> > >
> > > I assume that this is a patch in your own Android 5.4 kernel, right? I'm not
> >
> > Yes, the patch indeed is used in my own Android12 with kernel5.4.
> >
> > > aware of any such patch in Android Common Kernel. If it's there, do you mind
> > > pointing me to the gerrit change that introduced it?
> >
> > emmm, sorry I kind of understand what that means.  Your means is  what
> > I need to do is to send this patch to google?
>
> Oh no. I meant if you are *not* carrying this patch in your own, I'd appreciate
> getting a link to when it was merged into Google' tree. But you already said
> you carry this patch on your own kernel, so there's nothing to do :)
>
> >
> > >
> > > > Because the kernel used in Android do not have the schedtune, and the
> > > > uclamp can not
> > > > boost all the util, and this is the reason for the design of the patch.
> > >
> > > Do you have a specific workload in mind here that is failing? It would help if
> > > you can explain in detail the mode of failure you're seeing to help us
> > > understand the problem better.
> >
> > The patch has has been working with me for a while, I can redo this
> > data, but this might take a while :)
>
> But there must have been a reason you introduced it in the first place, what
> was that reason?
>
> >
> > > >
> > > > >
> > > > > > Scenario:
> > > > > > if the task_util = 200, {uclamp_min, uclamp_max} = {100, 1024}
> > > > > >
> > > > > > without patch:
> > > > > > clamp_util = 200;
> > > > > >
> > > > > > with patch:
> > > > > > clamp_util = 200 + (100 / 1024) * (1024 - 200) = 280;
> > >
> > > If a task util was 200, how long does it take for it to reach 280? Why do you
> > > need to have this immediate boost value applied and can't wait for this time to
> > > lapse? I'm not sure, but ramping up by 80 points shouldn't take *that* long,
> > > but don't quote me on this :-)
> >
> > Here is just one example to illustrate that , with this patch, It also
> > can boost the util which in {UCLAMP_MIN, UCLAMP_MAX}...
> >
> > >
> > > > >
> > > > > The same could be achieved by using {uclamp_min, uclamp_max} = {280, 1024}?
> > > >
> > > > Yes, for per-task, that is no problem, but for per-cgroup, most times,
> > > > we can not always only put the special task into the cgroup.
> > > > For example, in Android , there is a cgroup named "top-app", often all
> > > > the threads of a app would be put into it.
> > > > But, not all threads of this app need to be boosted, if we set the
> > > > uclamp_min too big, the all the small task would be clamped to
> > > > uclamp_min,
> > > > the power consumption would be increased, howerever, if setting the
> > > > uclamp_min smaller, the performance may be increased.
> > > > Such as:
> > > > a task's util is 50,  {uclamp_min, uclamp_max} = {100, 1024}
> > > > the boost_util =  50 + (100 / 1024) * (1024 - 50) = 145;
> > > > but if we set {uclamp_min, uclamp_max} = {280, 1024}, without patch:
> > > > the clamp_util = 280.
> > >
> > > I assume {uclamp_min, uclamp_max} = {145, 1024} is not good enough because you
> > > want this 200 task to be boosted to 280. One can argue that not all tasks at
> > > 200 need to be boosted to 280 too. So the question is, like above, what type
> > > of tasks that are failing here and how do you observe this failure? It seems
> > > there's a class of performance critical tasks that need this fast boosting.
> > > Can't you identify them and boost them individually?
> >
> > Yes, the best way to do that is boosting them individually, but
> > usually, it may not be so easy...
>
> Yes I appreciate that, but cgroup is a coarse grain controller. Even with your
> approach, you will still have to find the best compromise because some tasks
> will get more boosting than they really need to and waste power even with your
> approach.
>
> For best outcome with uclamp; the cgroup should be used to specify the minimum
> performance requirement of a class of tasks, then use the per-task API to fine
> tune the settings for specific tasks.
>
> I appreciate it'll take time to get there, but this is the best way forward.
>
> If you have a specific use case that's failing, it will still be good to share
> the details to think more if there's something we can do about it at the kernel
> level.
>
> Thanks
>
> --
> Qais Yousef

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST
  2021-07-28  1:40           ` Xuewen Yan
@ 2021-07-28 15:34             ` Qais Yousef
  0 siblings, 0 replies; 8+ messages in thread
From: Qais Yousef @ 2021-07-28 15:34 UTC (permalink / raw)
  To: Xuewen Yan
  Cc: Dietmar Eggemann, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Steven Rostedt, Benjamin Segall, Mel Gorman,
	Daniel Bristot de Oliveira, mcgrof, Kees Cook, Iurii Zaikin,
	Paul Turner, Quentin Perret, linux-kernel, linux-fsdevel

On 07/28/21 09:40, Xuewen Yan wrote:
> Hi Qais
> 
> Thanks for your patient reply, and I have got that I need to do more
> work in uclamp to balance the performance and power, especially in
> per-task API.
> And If there is any progress in the future, I hope to keep
> communicating with you.

Sounds good :)

Thanks!

--
Qais Yousef

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-07-28 15:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-21  7:57 [PATCH] sched/uclamp: Introduce a method to transform UCLAMP_MIN into BOOST Xuewen Yan
2021-07-23 15:19 ` Dietmar Eggemann
2021-07-24  2:03   ` Xuewen Yan
2021-07-26 17:17     ` Qais Yousef
2021-07-27 12:16       ` Xuewen Yan
2021-07-27 13:45         ` Qais Yousef
2021-07-28  1:40           ` Xuewen Yan
2021-07-28 15:34             ` Qais Yousef

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.