linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, mingo@redhat.com,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, vschneid@redhat.com, ast@kernel.org,
	daniel@iogearbox.net, andrii@kernel.org, martin.lau@kernel.org,
	joshdon@google.com, brho@google.com, pjt@google.com,
	derkling@google.com, haoluo@google.com, dvernet@meta.com,
	dschatzberg@meta.com, dskarlat@cs.cmu.edu, riel@surriel.com
Cc: linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
	kernel-team@meta.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 13/31] sched_ext: Add boilerplate for extensible scheduler class
Date: Tue, 29 Nov 2022 22:22:55 -1000	[thread overview]
Message-ID: <20221130082313.3241517-14-tj@kernel.org> (raw)
In-Reply-To: <20221130082313.3241517-1-tj@kernel.org>

This adds dummy implementations of sched_ext interfaces which interact with
the scheduler core and hook them in the correct places. As they're all
dummies, this doesn't cause any behavior changes. This is split out to help
reviewing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/sched/ext.h | 12 ++++++++++++
 kernel/fork.c             |  2 ++
 kernel/sched/core.c       | 35 +++++++++++++++++++++++++++--------
 kernel/sched/ext.h        | 34 ++++++++++++++++++++++++++++++++++
 kernel/sched/idle.c       |  2 ++
 kernel/sched/sched.h      |  2 ++
 6 files changed, 79 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/sched/ext.h
 create mode 100644 kernel/sched/ext.h

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
new file mode 100644
index 000000000000..a05dfcf533b0
--- /dev/null
+++ b/include/linux/sched/ext.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_EXT_H
+#define _LINUX_SCHED_EXT_H
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+#error "NOT IMPLEMENTED YET"
+#else	/* !CONFIG_SCHED_CLASS_EXT */
+
+static inline void sched_ext_free(struct task_struct *p) {}
+
+#endif	/* CONFIG_SCHED_CLASS_EXT */
+#endif	/* _LINUX_SCHED_EXT_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index a90c6a4938c6..606c6b349799 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,6 +23,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/ext.h>
 #include <linux/seq_file.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
@@ -833,6 +834,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(refcount_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	sched_ext_free(tsk);
 	io_uring_free(tsk);
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9c5bfeeb30ba..b9e69e009343 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4554,6 +4554,8 @@ late_initcall(sched_core_sysctl_init);
  */
 int sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
+	int ret;
+
 	__sched_fork(clone_flags, p);
 	/*
 	 * We mark the process as NEW here. This guarantees that
@@ -4590,12 +4592,16 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->sched_reset_on_fork = 0;
 	}
 
-	if (dl_prio(p->prio))
-		return -EAGAIN;
-	else if (rt_prio(p->prio))
+	scx_pre_fork(p);
+
+	if (dl_prio(p->prio)) {
+		ret = -EAGAIN;
+		goto out_cancel;
+	} else if (rt_prio(p->prio)) {
 		p->sched_class = &rt_sched_class;
-	else
+	} else {
 		p->sched_class = &fair_sched_class;
+	}
 
 	init_entity_runnable_average(&p->se);
 
@@ -4613,6 +4619,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
 #endif
 	return 0;
+
+out_cancel:
+	scx_cancel_fork(p);
+	return ret;
 }
 
 int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
@@ -4643,16 +4653,18 @@ int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
 		p->sched_class->task_fork(p);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
-	return 0;
+	return scx_fork(p);
 }
 
 void sched_cancel_fork(struct task_struct *p)
 {
+	scx_cancel_fork(p);
 }
 
 void sched_post_fork(struct task_struct *p)
 {
 	uclamp_post_fork(p);
+	scx_post_fork(p);
 }
 
 unsigned long to_ratio(u64 period, u64 runtime)
@@ -5800,10 +5812,13 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
 	 * We can terminate the balance pass as soon as we know there is
 	 * a runnable task of @class priority or higher.
 	 */
-	for_class_range(class, prev->sched_class, &idle_sched_class) {
+	for_balance_class_range(class, prev->sched_class, &idle_sched_class) {
 		if (class->balance(rq, prev, rf))
 			break;
 	}
+#else
+	/* SCX needs the balance call even in UP, call it explicitly */
+	balance_scx_on_up(rq, prev, rf);
 #endif
 
 	put_prev_task(rq, prev);
@@ -5818,6 +5833,9 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	const struct sched_class *class;
 	struct task_struct *p;
 
+	if (scx_enabled())
+		goto restart;
+
 	/*
 	 * Optimization: we know that if all tasks are in the fair class we can
 	 * call that function directly, but only if the @prev task wasn't of a
@@ -5843,7 +5861,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 restart:
 	put_prev_task_balance(rq, prev, rf);
 
-	for_each_class(class) {
+	for_each_active_class(class) {
 		p = class->pick_next_task(rq);
 		if (p)
 			return p;
@@ -5876,7 +5894,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
 	const struct sched_class *class;
 	struct task_struct *p;
 
-	for_each_class(class) {
+	for_each_active_class(class) {
 		p = class->pick_task(rq);
 		if (p)
 			return p;
@@ -9810,6 +9828,7 @@ void __init sched_init(void)
 	balance_push_set(smp_processor_id(), false);
 #endif
 	init_sched_fair_class();
+	init_sched_ext_class();
 
 	psi_init();
 
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
new file mode 100644
index 000000000000..f348158ed33a
--- /dev/null
+++ b/kernel/sched/ext.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+#error "NOT IMPLEMENTED YET"
+#else	/* CONFIG_SCHED_CLASS_EXT */
+
+#define scx_enabled()		false
+
+static inline void scx_pre_fork(struct task_struct *p) {}
+static inline int scx_fork(struct task_struct *p) { return 0; }
+static inline void scx_post_fork(struct task_struct *p) {}
+static inline void scx_cancel_fork(struct task_struct *p) {}
+static inline int balance_scx(struct rq *rq, struct task_struct *prev,
+			      struct rq_flags *rf) { return 0; }
+static inline void init_sched_ext_class(void) {}
+
+#define for_each_active_class		for_each_class
+#define for_balance_class_range		for_class_range
+
+#endif	/* CONFIG_SCHED_CLASS_EXT */
+
+#ifndef CONFIG_SMP
+static inline void balance_scx_on_up(struct rq *rq, struct task_struct *prev,
+				     struct rq_flags *rf)
+{
+	balance_scx(rq, prev, rf);
+}
+#endif
+
+#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
+#error "NOT IMPLEMENTED YET"
+#else
+static inline void scx_update_idle(struct rq *rq, bool idle) {}
+#endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f26ab2675f7d..86bc5832bdc4 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -428,11 +428,13 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
 
 static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 {
+	scx_update_idle(rq, false);
 }
 
 static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
 {
 	update_idle_core(rq);
+	scx_update_idle(rq, true);
 	schedstat_inc(rq->sched_goidle);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0741827e3541..c00c27de2a30 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3252,4 +3252,6 @@ enum cpu_cftype_id {
 extern struct cftype cpu_cftypes[CPU_CFTYPE_CNT + 1];
 #endif /* CONFIG_CGROUP_SCHED */
 
+#include "ext.h"
+
 #endif /* _KERNEL_SCHED_SCHED_H */
-- 
2.38.1


  parent reply	other threads:[~2022-11-30  8:25 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-30  8:22 [PATCHSET RFC] sched: Implement BPF extensible scheduler class Tejun Heo
2022-11-30  8:22 ` [PATCH 01/31] rhashtable: Allow rhashtable to be used from irq-safe contexts Tejun Heo
2022-11-30 16:35   ` Linus Torvalds
2022-11-30 17:00     ` Tejun Heo
2022-12-06 21:36   ` [PATCH v2 " Tejun Heo
2022-12-09 10:50     ` patchwork-bot+netdevbpf
2022-11-30  8:22 ` [PATCH 02/31] cgroup: Implement cgroup_show_cftypes() Tejun Heo
2022-11-30  8:22 ` [PATCH 03/31] BPF: Add @prog to bpf_struct_ops->check_member() Tejun Heo
2022-11-30  8:22 ` [PATCH 04/31] sched: Allow sched_cgroup_fork() to fail and introduce sched_cancel_fork() Tejun Heo
2022-12-12 11:13   ` Peter Zijlstra
2022-12-12 18:03     ` Tejun Heo
2022-12-12 20:07       ` Peter Zijlstra
2022-12-12 20:12         ` Tejun Heo
2022-11-30  8:22 ` [PATCH 05/31] sched: Add sched_class->reweight_task() Tejun Heo
2022-12-12 11:22   ` Peter Zijlstra
2022-12-12 17:34     ` Tejun Heo
2022-12-12 20:11       ` Peter Zijlstra
2022-12-12 20:15         ` Tejun Heo
2022-11-30  8:22 ` [PATCH 06/31] sched: Add sched_class->switching_to() and expose check_class_changing/changed() Tejun Heo
2022-12-12 11:28   ` Peter Zijlstra
2022-12-12 17:59     ` Tejun Heo
2022-11-30  8:22 ` [PATCH 07/31] sched: Factor out cgroup weight conversion functions Tejun Heo
2022-11-30  8:22 ` [PATCH 08/31] sched: Expose css_tg() and __setscheduler_prio() in kernel/sched/sched.h Tejun Heo
2022-12-12 11:49   ` Peter Zijlstra
2022-12-12 17:47     ` Tejun Heo
2022-11-30  8:22 ` [PATCH 09/31] sched: Enumerate CPU cgroup file types Tejun Heo
2022-11-30  8:22 ` [PATCH 10/31] sched: Add @reason to sched_class->rq_{on|off}line() Tejun Heo
2022-12-12 11:57   ` Peter Zijlstra
2022-12-12 18:06     ` Tejun Heo
2022-11-30  8:22 ` [PATCH 11/31] sched: Add @reason to sched_move_task() Tejun Heo
2022-12-12 12:00   ` Peter Zijlstra
2022-12-12 17:54     ` Tejun Heo
2022-11-30  8:22 ` [PATCH 12/31] sched: Add normal_policy() Tejun Heo
2022-11-30  8:22 ` Tejun Heo [this message]
2022-11-30  8:22 ` [PATCH 14/31] sched_ext: Implement BPF extensible scheduler class Tejun Heo
2022-12-02 17:08   ` Barret Rhoden
2022-12-02 18:01     ` Tejun Heo
2022-12-06 21:42       ` Tejun Heo
2022-12-06 21:44   ` Tejun Heo
2022-12-11 22:33   ` Julia Lawall
2022-12-12  2:15     ` Tejun Heo
2022-12-12  6:03       ` Julia Lawall
2022-12-12  6:08         ` Tejun Heo
2022-12-12 12:31   ` Peter Zijlstra
2022-12-12 20:03     ` Tejun Heo
2022-12-12 12:53   ` Peter Zijlstra
2022-12-12 21:33     ` Tejun Heo
2022-12-13 10:55       ` Peter Zijlstra
2022-12-13 18:12         ` Tejun Heo
2022-12-13 18:40           ` Rik van Riel
2022-12-13 23:20             ` Josh Don
2022-12-13 10:57       ` Peter Zijlstra
2022-12-13 17:32         ` Tejun Heo
2022-11-30  8:22 ` [PATCH 15/31] sched_ext: [TEMPORARY] Add temporary workaround kfunc helpers Tejun Heo
2022-11-30  8:22 ` [PATCH 16/31] sched_ext: Add scx_example_dummy and scx_example_qmap example schedulers Tejun Heo
2022-11-30  8:22 ` [PATCH 17/31] sched_ext: Add sysrq-S which disables the BPF scheduler Tejun Heo
2022-11-30  8:23 ` [PATCH 18/31] sched_ext: Implement runnable task stall watchdog Tejun Heo
2022-11-30  8:23 ` [PATCH 19/31] sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT Tejun Heo
2022-11-30  8:23 ` [PATCH 20/31] sched_ext: Allow BPF schedulers to switch all eligible tasks into sched_ext Tejun Heo
2022-11-30  8:23 ` [PATCH 21/31] sched_ext: Implement scx_bpf_kick_cpu() and task preemption support Tejun Heo
2022-11-30  8:23 ` [PATCH 22/31] sched_ext: Add task state tracking operations Tejun Heo
2022-11-30  8:23 ` [PATCH 23/31] sched_ext: Implement tickless support Tejun Heo
2022-11-30  8:23 ` [PATCH 24/31] sched_ext: Add cgroup support Tejun Heo
2022-11-30  8:23 ` [PATCH 25/31] sched_ext: Implement SCX_KICK_WAIT Tejun Heo
2022-11-30  8:23 ` [PATCH 26/31] sched_ext: Implement sched_ext_ops.cpu_acquire/release() Tejun Heo
2022-11-30  8:23 ` [PATCH 27/31] sched_ext: Implement sched_ext_ops.cpu_online/offline() Tejun Heo
2022-11-30  8:23 ` [PATCH 28/31] sched_ext: Add Documentation/scheduler/sched-ext.rst Tejun Heo
2022-12-12  4:01   ` Bagas Sanjaya
2022-12-12  6:28     ` Tejun Heo
2022-12-12 13:07       ` Bagas Sanjaya
2022-12-12 17:30         ` Tejun Heo
2022-12-12 12:39   ` Peter Zijlstra
2022-12-12 17:16     ` Tejun Heo
2022-11-30  8:23 ` [PATCH 29/31] sched_ext: Add a basic, userland vruntime scheduler Tejun Heo
2022-11-30  8:23 ` [PATCH 30/31] BPF: [TEMPORARY] Nerf BTF scalar value check Tejun Heo
2022-11-30  8:23 ` [PATCH 31/31] sched_ext: Add a rust userspace hybrid example scheduler Tejun Heo
2022-12-12 14:03   ` Peter Zijlstra
2022-12-12 21:05     ` Peter Oskolkov
2022-12-13 11:02       ` Peter Zijlstra
2022-12-13 18:24         ` Peter Oskolkov
2022-12-12 22:00     ` Tejun Heo
2022-12-12 22:18     ` Josh Don
2022-12-13 11:30       ` Peter Zijlstra
2022-12-13 20:33         ` Tejun Heo
2022-12-14  2:00         ` Josh Don
2022-12-12  9:37 ` [PATCHSET RFC] sched: Implement BPF extensible scheduler class Peter Zijlstra
2022-12-12 17:27   ` Tejun Heo
2022-12-12 10:14 ` Peter Zijlstra
2022-12-14  2:11   ` Josh Don
2022-12-14  8:55     ` Peter Zijlstra
2022-12-14 22:23       ` Tejun Heo
2022-12-14 23:20         ` Barret Rhoden

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221130082313.3241517-14-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brho@google.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=daniel@iogearbox.net \
    --cc=derkling@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dschatzberg@meta.com \
    --cc=dskarlat@cs.cmu.edu \
    --cc=dvernet@meta.com \
    --cc=haoluo@google.com \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=riel@surriel.com \
    --cc=rostedt@goodmis.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).