LKML Archive on lore.kernel.org
 help / Atom feed
From: Patrick Bellasi <patrick.bellasi@arm.com>
To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
	"Rafael J . Wysocki" <rafael.j.wysocki@intel.com>,
	Viresh Kumar <viresh.kumar@linaro.org>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Paul Turner <pjt@google.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Morten Rasmussen <morten.rasmussen@arm.com>,
	Juri Lelli <juri.lelli@redhat.com>, Todd Kjos <tkjos@google.com>,
	Joel Fernandes <joelaf@google.com>,
	Steve Muckle <smuckle@google.com>,
	Suren Baghdasaryan <surenb@google.com>
Subject: [PATCH v3 12/14] sched/core: uclamp: add system default clamps
Date: Mon,  6 Aug 2018 17:39:44 +0100
Message-ID: <20180806163946.28380-13-patrick.bellasi@arm.com> (raw)
In-Reply-To: <20180806163946.28380-1-patrick.bellasi@arm.com>

Clamp values cannot be tuned at the root cgroup level. Moreover, because
of the delegation model requirements and how the parent clamps
propagation works, if we want to enable subgroups to set a non null
util.min, we need to be able to configure the root group util.min to the
allow the maximum utilization (SCHED_CAPACITY_SCALE = 1024).

Unfortunately this setup will also mean that all tasks running in the
root group, will always get a maximum util.min clamp, unless they have a
lower task specific clamp which is definitively not a desirable default
configuration.

Let's fix this by explicitly adding a system default configuration
(sysctl_sched_uclamp_util_{min,max}) which works as a restrictive clamp
for all tasks running on the root group.

This interface is available independently from cgroups, thus providing a
complete solution for system wide utilization clamping configuration.

Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Paul Turner <pjt@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@google.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Steve Muckle <smuckle@google.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-pm@vger.kernel.org
---
 include/linux/sched/sysctl.h |  11 ++++
 kernel/sched/core.c          | 102 +++++++++++++++++++++++++++++++++--
 kernel/sysctl.c              |  16 ++++++
 3 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 913488d828cb..c46346d3cc69 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -55,6 +55,11 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
+#ifdef CONFIG_UCLAMP_TASK
+extern unsigned int sysctl_sched_uclamp_util_min;
+extern unsigned int sysctl_sched_uclamp_util_max;
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 #endif
@@ -74,6 +79,12 @@ extern int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
+#ifdef CONFIG_UCLAMP_TASK
+extern int sched_uclamp_handler(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp,
+				loff_t *ppos);
+#endif
+
 extern int sysctl_numa_balancing(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f54fd9bda9a7..48458fea2d5e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -728,6 +728,20 @@ static void set_load_weight(struct task_struct *p, bool update_load)
  */
 static DEFINE_MUTEX(uclamp_mutex);
 
+/*
+ * Minimum utilization for tasks in the root cgroup
+ * default: 0
+ */
+unsigned int sysctl_sched_uclamp_util_min;
+
+/*
+ * Maximum utilization for tasks in the root cgroup
+ * default: 1024
+ */
+unsigned int sysctl_sched_uclamp_util_max = 1024;
+
+static struct uclamp_se uclamp_default[UCLAMP_CNT];
+
 /**
  * uclamp_map: reference counts a utilization "clamp value"
  * @value:    the utilization "clamp value" required
@@ -957,12 +971,25 @@ static inline int uclamp_task_group_id(struct task_struct *p, int clamp_id)
 	group_id = uc_se->group_id;
 
 #ifdef CONFIG_UCLAMP_TASK_GROUP
+	/*
+	 * Tasks in the root group, which do not have a task specific clamp
+	 * value, get the system default calmp value.
+	 */
+	if (group_id == UCLAMP_NOT_VALID &&
+	    task_group(p) == &root_task_group) {
+		return uclamp_default[clamp_id].group_id;
+	}
+
 	/* Use TG's clamp value to limit task specific values */
 	uc_se = &task_group(p)->uclamp[clamp_id];
 	if (group_id == UCLAMP_NOT_VALID ||
 	    clamp_value > uc_se->effective.value) {
 		group_id = uc_se->effective.group_id;
 	}
+#else
+	/* By default, all tasks get the system default clamp value */
+	if (group_id == UCLAMP_NOT_VALID)
+		return uclamp_default[clamp_id].group_id;
 #endif
 
 	return group_id;
@@ -1269,6 +1296,75 @@ static inline void uclamp_group_get(struct task_struct *p,
 	uclamp_group_put(clamp_id, prev_group_id);
 }
 
+int sched_uclamp_handler(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp,
+			 loff_t *ppos)
+{
+	int group_id[UCLAMP_CNT] = { UCLAMP_NOT_VALID };
+	struct uclamp_se *uc_se;
+	int old_min, old_max;
+	int result;
+
+	mutex_lock(&uclamp_mutex);
+
+	old_min = sysctl_sched_uclamp_util_min;
+	old_max = sysctl_sched_uclamp_util_max;
+
+	result = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (result)
+		goto undo;
+	if (!write)
+		goto done;
+
+	if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max)
+		goto undo;
+	if (sysctl_sched_uclamp_util_max > 1024)
+		goto undo;
+
+	/* Find a valid group_id for each required clamp value */
+	if (old_min != sysctl_sched_uclamp_util_min) {
+		result = uclamp_group_find(UCLAMP_MIN, sysctl_sched_uclamp_util_min);
+		if (result == -ENOSPC) {
+			pr_err("Cannot allocate more than %d UTIL_MIN clamp groups\n",
+			       CONFIG_UCLAMP_GROUPS_COUNT);
+			goto undo;
+		}
+		group_id[UCLAMP_MIN] = result;
+	}
+	if (old_max != sysctl_sched_uclamp_util_max) {
+		result = uclamp_group_find(UCLAMP_MAX, sysctl_sched_uclamp_util_max);
+		if (result == -ENOSPC) {
+			pr_err("Cannot allocate more than %d UTIL_MAX clamp groups\n",
+			       CONFIG_UCLAMP_GROUPS_COUNT);
+			goto undo;
+		}
+		group_id[UCLAMP_MAX] = result;
+	}
+
+	/* Update each required clamp group */
+	if (old_min != sysctl_sched_uclamp_util_min) {
+		uc_se = &uclamp_default[UCLAMP_MIN];
+		uclamp_group_get(NULL, UCLAMP_MIN, group_id[UCLAMP_MIN],
+				 uc_se, sysctl_sched_uclamp_util_min);
+	}
+	if (old_max != sysctl_sched_uclamp_util_max) {
+		uc_se = &uclamp_default[UCLAMP_MAX];
+		uclamp_group_get(NULL, UCLAMP_MAX, group_id[UCLAMP_MAX],
+				 uc_se, sysctl_sched_uclamp_util_max);
+	}
+
+	if (result) {
+undo:
+		sysctl_sched_uclamp_util_min = old_min;
+		sysctl_sched_uclamp_util_max = old_max;
+	}
+
+done:
+	mutex_unlock(&uclamp_mutex);
+
+	return result;
+}
+
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 /**
  * init_uclamp_sched_group: initialize data structures required for TG's
@@ -1291,11 +1387,11 @@ static inline void init_uclamp_sched_group(void)
 		/* Map root TG's clamp value */
 		uclamp_group_init(clamp_id, group_id, uclamp_none(clamp_id));
 
-		/* Init root TG's clamp group */
+		/* Init root TG's clamp group: max values always enabled */
 		uc_se = &root_task_group.uclamp[clamp_id];
-		uc_se->value = uclamp_none(clamp_id);
+		uc_se->value = uclamp_none(UCLAMP_MAX);
 		uc_se->group_id = group_id;
-		uc_se->effective.value = uclamp_none(clamp_id);
+		uc_se->effective.value = uclamp_none(UCLAMP_MAX);
 		uc_se->effective.group_id = group_id;
 
 		/* Attach root TG's clamp group */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f22f76b7a138..051d6da237e0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -442,6 +442,22 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sched_rr_handler,
 	},
+#ifdef CONFIG_UCLAMP_TASK
+	{
+		.procname	= "sched_uclamp_util_min",
+		.data		= &sysctl_sched_uclamp_util_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_uclamp_handler,
+	},
+	{
+		.procname	= "sched_uclamp_util_max",
+		.data		= &sysctl_sched_uclamp_util_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_uclamp_handler,
+	},
+#endif
 #ifdef CONFIG_SCHED_AUTOGROUP
 	{
 		.procname	= "sched_autogroup_enabled",
-- 
2.18.0


  parent reply index

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-06 16:39 [PATCH v3 00/14] Add utilization clamping support Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 01/14] sched/core: uclamp: extend sched_setattr to support utilization clamping Patrick Bellasi
2018-08-06 16:50   ` Randy Dunlap
2018-08-09  8:39     ` Patrick Bellasi
2018-08-09 15:20       ` Randy Dunlap
2018-08-07  9:59   ` Juri Lelli
2018-08-13 12:14     ` Patrick Bellasi
2018-08-13 12:27       ` Juri Lelli
2018-08-07 12:35   ` Juri Lelli
2018-08-09  9:14     ` Patrick Bellasi
2018-08-09  9:50       ` Juri Lelli
2018-08-09 15:23         ` Patrick Bellasi
2018-08-10  7:50           ` Juri Lelli
2018-08-17 10:34           ` Quentin Perret
2018-08-17 10:57             ` Patrick Bellasi
2018-08-17 11:14               ` Quentin Perret
2018-08-06 16:39 ` [PATCH v3 02/14] sched/core: uclamp: map TASK's clamp values into CPU's clamp groups Patrick Bellasi
2018-08-14 11:25   ` Pavan Kondeti
2018-08-14 15:21     ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 03/14] sched/core: uclamp: add CPU's clamp groups accounting Patrick Bellasi
2018-08-14 15:44   ` Dietmar Eggemann
2018-08-14 16:49     ` Patrick Bellasi
2018-08-15  9:37       ` Dietmar Eggemann
2018-08-15 10:54         ` Patrick Bellasi
2018-08-15 10:59           ` Dietmar Eggemann
2018-08-16 13:32             ` Patrick Bellasi
2018-08-16 13:37               ` Quentin Perret
2018-08-16 13:45                 ` Dietmar Eggemann
2018-08-16 14:21                   ` Quentin Perret
2018-08-16 15:00                     ` Dietmar Eggemann
2018-08-17 11:04   ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 04/14] sched/core: uclamp: update CPU's refcount on clamp changes Patrick Bellasi
2018-08-15 15:02   ` Dietmar Eggemann
2018-08-16 13:22     ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 05/14] sched/cpufreq: uclamp: add utilization clamping for FAIR tasks Patrick Bellasi
2018-08-08 13:18   ` Vincent Guittot
2018-08-09 15:30     ` Patrick Bellasi
2018-08-15 15:30   ` Dietmar Eggemann
2018-08-16 13:53     ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 06/14] sched/cpufreq: uclamp: add utilization clamping for RT tasks Patrick Bellasi
2018-08-07 13:26   ` Juri Lelli
2018-08-09 15:34     ` Patrick Bellasi
2018-08-09 16:03       ` Vincent Guittot
2018-08-13 10:12         ` Patrick Bellasi
2018-08-13 10:50           ` Juri Lelli
2018-08-13 12:07           ` Vincent Guittot
2018-08-13 12:09             ` Vincent Guittot
2018-08-13 12:49             ` Patrick Bellasi
2018-08-13 14:06               ` Vincent Guittot
2018-08-13 15:01                 ` Patrick Bellasi
2018-08-16 10:34                   ` Dietmar Eggemann
2018-08-16 13:40                     ` Patrick Bellasi
2018-08-07 13:54   ` Quentin Perret
2018-08-09 15:41     ` Patrick Bellasi
2018-08-09 15:55       ` Quentin Perret
2018-08-13 10:17         ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 07/14] sched/core: uclamp: enforce last task UCLAMP_MAX Patrick Bellasi
2018-08-16 15:43   ` Dietmar Eggemann
2018-08-16 16:47     ` Patrick Bellasi
2018-08-16 17:10       ` Dietmar Eggemann
2018-08-16 17:27         ` Patrick Bellasi
2018-08-16 17:20   ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 08/14] sched/core: uclamp: extend cpu's cgroup controller Patrick Bellasi
2018-08-17 12:21   ` Dietmar Eggemann
2018-08-17 14:24     ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 09/14] sched/core: uclamp: propagate parent clamps Patrick Bellasi
2018-08-16  9:09   ` Pavan Kondeti
2018-08-16 14:07     ` Patrick Bellasi
2018-08-17 13:43   ` Dietmar Eggemann
2018-08-17 14:45     ` Patrick Bellasi
2018-08-17 15:50       ` Dietmar Eggemann
2018-08-20 10:01         ` Dietmar Eggemann
2018-08-20 12:28           ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 10/14] sched/core: uclamp: map TG's clamp values into CPU's clamp groups Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 11/14] sched/core: uclamp: use TG's clamps to restrict Task's clamps Patrick Bellasi
2018-08-06 16:39 ` Patrick Bellasi [this message]
2018-08-16  9:13   ` [PATCH v3 12/14] sched/core: uclamp: add system default clamps Pavan Kondeti
2018-08-16 14:37     ` Patrick Bellasi
2018-08-20 10:18   ` Dietmar Eggemann
2018-08-20 12:27     ` Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 13/14] sched/core: uclamp: update CPU's refcount on TG's clamp changes Patrick Bellasi
2018-08-06 16:39 ` [PATCH v3 14/14] sched/core: uclamp: use percentage clamp values Patrick Bellasi

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180806163946.28380-13-patrick.bellasi@arm.com \
    --to=patrick.bellasi@arm.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=joelaf@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=morten.rasmussen@arm.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rafael.j.wysocki@intel.com \
    --cc=smuckle@google.com \
    --cc=surenb@google.com \
    --cc=tj@kernel.org \
    --cc=tkjos@google.com \
    --cc=vincent.guittot@linaro.org \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox