linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, vincent.guittot@linaro.org
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org,
	juri.lelli@redhat.com, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, corbet@lwn.net, qyousef@layalina.io,
	chris.hyser@oracle.com, patrick.bellasi@matbug.net,
	pjt@google.com, pavel@ucw.cz, qperret@google.com,
	tim.c.chen@linux.intel.com, joshdon@google.com, timj@gnu.org,
	kprateek.nayak@amd.com, yu.c.chen@intel.com,
	youssefesmat@chromium.org, joel@joelfernandes.org, efault@gmx.de,
	Parth Shah <parth@linux.ibm.com>
Subject: [PATCH 01/17] sched: Introduce latency-nice as a per-task attribute
Date: Tue, 28 Mar 2023 11:26:23 +0200	[thread overview]
Message-ID: <20230328110353.631740156@infradead.org> (raw)
In-Reply-To: 20230328092622.062917921@infradead.org

From: Parth Shah <parth@linux.ibm.com>

Latency-nice indicates the latency requirements of a task with respect
to the other tasks in the system. The value of the attribute can be within
the range of [-20, 19] both inclusive to be in-line with the values just
like task nice values.

Just like task nice, -20 is the 'highest' priority and conveys this
task should get minimal latency, conversely 19 is the lowest priority
and conveys this task will get the least consideration and will thus
receive maximal latency.

[peterz: rebase, squash]
Signed-off-by: Parth Shah <parth@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched.h            |    1 +
 include/uapi/linux/sched.h       |    4 +++-
 include/uapi/linux/sched/types.h |   19 +++++++++++++++++++
 init/init_task.c                 |    3 ++-
 kernel/sched/core.c              |   27 ++++++++++++++++++++++++++-
 kernel/sched/debug.c             |    1 +
 tools/include/uapi/linux/sched.h |    4 +++-
 7 files changed, 55 insertions(+), 4 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -784,6 +784,7 @@ struct task_struct {
 	int				static_prio;
 	int				normal_prio;
 	unsigned int			rt_priority;
+	int				latency_prio;
 
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args {
 #define SCHED_FLAG_KEEP_PARAMS		0x10
 #define SCHED_FLAG_UTIL_CLAMP_MIN	0x20
 #define SCHED_FLAG_UTIL_CLAMP_MAX	0x40
+#define SCHED_FLAG_LATENCY_NICE		0x80
 
 #define SCHED_FLAG_KEEP_ALL	(SCHED_FLAG_KEEP_POLICY | \
 				 SCHED_FLAG_KEEP_PARAMS)
@@ -143,6 +144,7 @@ struct clone_args {
 			 SCHED_FLAG_RECLAIM		| \
 			 SCHED_FLAG_DL_OVERRUN		| \
 			 SCHED_FLAG_KEEP_ALL		| \
-			 SCHED_FLAG_UTIL_CLAMP)
+			 SCHED_FLAG_UTIL_CLAMP		| \
+			 SCHED_FLAG_LATENCY_NICE)
 
 #endif /* _UAPI_LINUX_SCHED_H */
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -10,6 +10,7 @@ struct sched_param {
 
 #define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
 #define SCHED_ATTR_SIZE_VER1	56	/* add: util_{min,max} */
+#define SCHED_ATTR_SIZE_VER2	60	/* add: latency_nice */
 
 /*
  * Extended scheduling parameters data structure.
@@ -98,6 +99,22 @@ struct sched_param {
  * scheduled on a CPU with no more capacity than the specified value.
  *
  * A task utilization boundary can be reset by setting the attribute to -1.
+ *
+ * Latency Tolerance Attributes
+ * ===========================
+ *
+ * A subset of sched_attr attributes allows to specify the relative latency
+ * requirements of a task with respect to the other tasks running/queued in the
+ * system.
+ *
+ * @ sched_latency_nice	task's latency_nice value
+ *
+ * The latency_nice of a task can have any value in a range of
+ * [MIN_LATENCY_NICE..MAX_LATENCY_NICE].
+ *
+ * A task with latency_nice with the value of LATENCY_NICE_MIN can be
+ * taken for a task requiring a lower latency as opposed to the task with
+ * higher latency_nice.
  */
 struct sched_attr {
 	__u32 size;
@@ -120,6 +137,8 @@ struct sched_attr {
 	__u32 sched_util_min;
 	__u32 sched_util_max;
 
+	/* latency requirement hints */
+	__s32 sched_latency_nice;
 };
 
 #endif /* _UAPI_LINUX_SCHED_TYPES_H */
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -78,6 +78,7 @@ struct task_struct init_task
 	.prio		= MAX_PRIO - 20,
 	.static_prio	= MAX_PRIO - 20,
 	.normal_prio	= MAX_PRIO - 20,
+	.latency_prio	= DEFAULT_PRIO,
 	.policy		= SCHED_NORMAL,
 	.cpus_ptr	= &init_task.cpus_mask,
 	.user_cpus_ptr	= NULL,
@@ -89,7 +90,7 @@ struct task_struct init_task
 		.fn = do_no_restart_syscall,
 	},
 	.se		= {
-		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
+		.group_node	= LIST_HEAD_INIT(init_task.se.group_node),
 	},
 	.rt		= {
 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4684,6 +4684,8 @@ int sched_fork(unsigned long clone_flags
 		p->prio = p->normal_prio = p->static_prio;
 		set_load_weight(p, false);
 
+		p->latency_prio = NICE_TO_PRIO(0);
+
 		/*
 		 * We don't need the reset flag anymore after the fork. It has
 		 * fulfilled its duty:
@@ -7428,7 +7430,7 @@ static struct task_struct *find_process_
 #define SETPARAM_POLICY	-1
 
 static void __setscheduler_params(struct task_struct *p,
-		const struct sched_attr *attr)
+				  const struct sched_attr *attr)
 {
 	int policy = attr->sched_policy;
 
@@ -7452,6 +7454,13 @@ static void __setscheduler_params(struct
 	set_load_weight(p, true);
 }
 
+static void __setscheduler_latency(struct task_struct *p,
+				   const struct sched_attr *attr)
+{
+	if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE)
+		p->latency_prio = NICE_TO_PRIO(attr->sched_latency_nice);
+}
+
 /*
  * Check the target process has a UID that matches the current process's:
  */
@@ -7592,6 +7601,13 @@ static int __sched_setscheduler(struct t
 			return retval;
 	}
 
+	if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE) {
+		if (attr->sched_latency_nice > MAX_NICE)
+			return -EINVAL;
+		if (attr->sched_latency_nice < MIN_NICE)
+			return -EINVAL;
+	}
+
 	if (pi)
 		cpuset_read_lock();
 
@@ -7626,6 +7642,9 @@ static int __sched_setscheduler(struct t
 			goto change;
 		if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
 			goto change;
+		if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE &&
+		    attr->sched_latency_nice != PRIO_TO_NICE(p->latency_prio))
+			goto change;
 
 		p->sched_reset_on_fork = reset_on_fork;
 		retval = 0;
@@ -7714,6 +7733,7 @@ static int __sched_setscheduler(struct t
 		__setscheduler_params(p, attr);
 		__setscheduler_prio(p, newprio);
 	}
+	__setscheduler_latency(p, attr);
 	__setscheduler_uclamp(p, attr);
 
 	if (queued) {
@@ -7924,6 +7944,9 @@ static int sched_copy_attr(struct sched_
 	    size < SCHED_ATTR_SIZE_VER1)
 		return -EINVAL;
 
+	if ((attr->sched_flags & SCHED_FLAG_LATENCY_NICE) &&
+	    size < SCHED_ATTR_SIZE_VER2)
+		return -EINVAL;
 	/*
 	 * XXX: Do we want to be lenient like existing syscalls; or do we want
 	 * to be strict and return an error on out-of-bounds values?
@@ -8161,6 +8184,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pi
 	get_params(p, &kattr);
 	kattr.sched_flags &= SCHED_FLAG_ALL;
 
+	kattr.sched_latency_nice = PRIO_TO_NICE(p->latency_prio);
+
 #ifdef CONFIG_UCLAMP_TASK
 	/*
 	 * This could race with another potential updater, but this is fine
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1043,6 +1043,7 @@ void proc_sched_show_task(struct task_st
 #endif
 	P(policy);
 	P(prio);
+	P(latency_prio);
 	if (task_has_dl_policy(p)) {
 		P(dl.runtime);
 		P(dl.deadline);
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args {
 #define SCHED_FLAG_KEEP_PARAMS		0x10
 #define SCHED_FLAG_UTIL_CLAMP_MIN	0x20
 #define SCHED_FLAG_UTIL_CLAMP_MAX	0x40
+#define SCHED_FLAG_LATENCY_NICE		0x80
 
 #define SCHED_FLAG_KEEP_ALL	(SCHED_FLAG_KEEP_POLICY | \
 				 SCHED_FLAG_KEEP_PARAMS)
@@ -143,6 +144,7 @@ struct clone_args {
 			 SCHED_FLAG_RECLAIM		| \
 			 SCHED_FLAG_DL_OVERRUN		| \
 			 SCHED_FLAG_KEEP_ALL		| \
-			 SCHED_FLAG_UTIL_CLAMP)
+			 SCHED_FLAG_UTIL_CLAMP		| \
+			 SCHED_FLAG_LATENCY_NICE)
 
 #endif /* _UAPI_LINUX_SCHED_H */



  reply	other threads:[~2023-03-28 11:07 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-28  9:26 [PATCH 00/17] sched: EEVDF using latency-nice Peter Zijlstra
2023-03-28  9:26 ` Peter Zijlstra [this message]
2023-03-28  9:26 ` [PATCH 02/17] sched/fair: Add latency_offset Peter Zijlstra
2023-03-28  9:26 ` [PATCH 03/17] sched/fair: Add sched group latency support Peter Zijlstra
2023-03-28  9:26 ` [PATCH 04/17] sched/fair: Add avg_vruntime Peter Zijlstra
2023-03-28 23:57   ` Josh Don
2023-03-29  7:50     ` Peter Zijlstra
2023-04-05 19:13       ` Peter Zijlstra
2023-03-28  9:26 ` [PATCH 05/17] sched/fair: Remove START_DEBIT Peter Zijlstra
2023-03-28  9:26 ` [PATCH 06/17] sched/fair: Add lag based placement Peter Zijlstra
2023-04-03  9:18   ` Chen Yu
2023-04-05  9:47     ` Peter Zijlstra
2023-04-06  3:03       ` Chen Yu
2023-04-13 15:42       ` Chen Yu
2023-04-13 15:55         ` Chen Yu
2023-03-28  9:26 ` [PATCH 07/17] rbtree: Add rb_add_augmented_cached() helper Peter Zijlstra
2023-03-28  9:26 ` [PATCH 08/17] sched/fair: Implement an EEVDF like policy Peter Zijlstra
2023-03-29  1:26   ` Josh Don
2023-03-29  8:02     ` Peter Zijlstra
2023-03-29  8:06     ` Peter Zijlstra
2023-03-29  8:22       ` Peter Zijlstra
2023-03-29 18:48         ` Josh Don
2023-03-29  8:12     ` Peter Zijlstra
2023-03-29 18:54       ` Josh Don
2023-03-29  8:18     ` Peter Zijlstra
2023-03-29 14:35   ` Vincent Guittot
2023-03-30  8:01     ` Peter Zijlstra
2023-03-30 17:05       ` Vincent Guittot
2023-04-04 12:00         ` Peter Zijlstra
2023-03-28  9:26 ` [PATCH 09/17] sched: Commit to lag based placement Peter Zijlstra
2023-03-28  9:26 ` [PATCH 10/17] sched/smp: Use lag to simplify cross-runqueue placement Peter Zijlstra
2023-03-28  9:26 ` [PATCH 11/17] sched: Commit to EEVDF Peter Zijlstra
2023-03-28  9:26 ` [PATCH 12/17] sched/debug: Rename min_granularity to base_slice Peter Zijlstra
2023-03-28  9:26 ` [PATCH 13/17] sched: Merge latency_offset into slice Peter Zijlstra
2023-03-28  9:26 ` [PATCH 14/17] sched/eevdf: Better handle mixed slice length Peter Zijlstra
2023-03-31 15:26   ` Vincent Guittot
2023-04-04  9:29     ` Peter Zijlstra
2023-04-04 13:50       ` Joel Fernandes
2023-04-05  5:41         ` Mike Galbraith
2023-04-05  8:35         ` Peter Zijlstra
2023-04-05 20:05           ` Joel Fernandes
2023-04-14 11:18             ` Phil Auld
2023-04-16  5:10               ` Joel Fernandes
     [not found]   ` <20230401232355.336-1-hdanton@sina.com>
2023-04-02  2:40     ` Mike Galbraith
2023-03-28  9:26 ` [PATCH 15/17] [RFC] sched/eevdf: Sleeper bonus Peter Zijlstra
2023-03-29  9:10   ` Mike Galbraith
2023-03-28  9:26 ` [PATCH 16/17] [RFC] sched/eevdf: Minimal vavg option Peter Zijlstra
2023-03-28  9:26 ` [PATCH 17/17] [DEBUG] sched/eevdf: Debug / validation crud Peter Zijlstra
2023-04-03  7:42 ` [PATCH 00/17] sched: EEVDF using latency-nice Shrikanth Hegde
2023-04-10  3:13 ` David Vernet
2023-04-11  2:09   ` David Vernet
     [not found] ` <20230410082307.1327-1-hdanton@sina.com>
2023-04-11 10:15   ` Mike Galbraith
     [not found]   ` <20230411133333.1790-1-hdanton@sina.com>
2023-04-11 14:56     ` Mike Galbraith
     [not found]     ` <20230412025042.1413-1-hdanton@sina.com>
2023-04-12  4:05       ` Mike Galbraith
2023-04-25 12:32 ` Phil Auld

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230328110353.631740156@infradead.org \
    --to=peterz@infradead.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=chris.hyser@oracle.com \
    --cc=corbet@lwn.net \
    --cc=dietmar.eggemann@arm.com \
    --cc=efault@gmx.de \
    --cc=joel@joelfernandes.org \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=parth@linux.ibm.com \
    --cc=patrick.bellasi@matbug.net \
    --cc=pavel@ucw.cz \
    --cc=pjt@google.com \
    --cc=qperret@google.com \
    --cc=qyousef@layalina.io \
    --cc=rostedt@goodmis.org \
    --cc=tim.c.chen@linux.intel.com \
    --cc=timj@gnu.org \
    --cc=vincent.guittot@linaro.org \
    --cc=youssefesmat@chromium.org \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).