linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: joel@joelfernandes.org, chris.hyser@oracle.com,
	joshdon@google.com, mingo@kernel.org, vincent.guittot@linaro.org,
	valentin.schneider@arm.com, mgorman@suse.de
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org,
	tj@kernel.org, tglx@linutronix.de
Subject: [PATCH 7/9] sched: Cgroup core-scheduling interface
Date: Thu, 01 Apr 2021 15:10:19 +0200	[thread overview]
Message-ID: <20210401133917.469929784@infradead.org> (raw)
In-Reply-To: <20210401131012.395311786@infradead.org>

Implement a basic cgroup core-scheduling interface.

A new cpu.core_sched file is added which takes the values 0,1. When
set, the cgroup and all it's descendants will be granted the same
cookie and thus allowed to share a core with each-other, but not with
system tasks or tasks of other subtrees that might have another
cookie.

The file is hierarchical, and a subtree can again set it to 1, in
which case that subtree will get a different cookie and will no longer
share with the parent tree.

For each task, the nearest core_sched parent 'wins'.

Interaction with the prctl() interface is non-existent and left for a
future patch.

Noteably; this patch somewhat abuses cgroup_mutex. By holding
cgroup_mutex over the write() operation, which sets the cookie, the
cookie is stable in any cgroup callback (that is called with
cgroup_mutex held). A future patch relies on ss->can_attach() and
ss->attach() being 'atomic', which is hard to do without cgroup_mutex.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c  |  150 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |    7 ++
 2 files changed, 157 insertions(+)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5688,10 +5688,53 @@ static inline void sched_core_cpu_starti
 		}
 	}
 }
+
+void sched_core_cgroup_online(struct task_group *parent, struct task_group *tg)
+{
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (parent->core_parent) {
+		WARN_ON_ONCE(parent->core_cookie);
+		WARN_ON_ONCE(!parent->core_parent->core_cookie);
+		tg->core_parent = parent->core_parent;
+
+	} else if (parent->core_cookie) {
+		WARN_ON_ONCE(parent->core_parent);
+		tg->core_parent = parent;
+	}
+}
+
+void sched_core_cgroup_free(struct task_group *tg)
+{
+	sched_core_put_cookie(tg->core_cookie);
+}
+
+unsigned long sched_core_cgroup_cookie(struct task_group *tg)
+{
+	unsigned long cookie = 0;
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (tg->core_cookie)
+		cookie = tg->core_cookie;
+	else if (tg->core_parent)
+		cookie = tg->core_parent->core_cookie;
+
+	return sched_core_get_cookie(cookie);
+}
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline void sched_core_cpu_starting(unsigned int cpu) {}
 
+static inline void sched_core_cgroup_free(struct task_group *tg) { }
+static inline void sched_core_cgroup_online(struct task_group *parent, struct task_group tg) { }
+
+static inline unsigned long sched_core_cgroup_cookie(struct task_group *tg)
+{
+	return 0;
+}
+
 static struct task_struct *
 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
@@ -9310,6 +9353,7 @@ static void sched_free_group(struct task
 	free_fair_sched_group(tg);
 	free_rt_sched_group(tg);
 	autogroup_free(tg);
+	sched_core_cgroup_free(tg);
 	kmem_cache_free(task_group_cache, tg);
 }
 
@@ -9353,6 +9397,8 @@ void sched_online_group(struct task_grou
 	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	online_fair_sched_group(tg);
+
+	sched_core_cgroup_online(parent, tg);
 }
 
 /* rcu callback to free various structures associated with a task group */
@@ -9414,6 +9460,7 @@ void sched_move_task(struct task_struct
 {
 	int queued, running, queue_flags =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+	unsigned long cookie;
 	struct rq_flags rf;
 	struct rq *rq;
 
@@ -9443,6 +9490,10 @@ void sched_move_task(struct task_struct
 	}
 
 	task_rq_unlock(rq, tsk, &rf);
+
+	cookie = sched_core_cgroup_cookie(tsk->sched_task_group);
+	cookie = sched_core_update_cookie(tsk, cookie);
+	sched_core_put_cookie(cookie);
 }
 
 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
@@ -10050,6 +10101,89 @@ static u64 cpu_rt_period_read_uint(struc
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+#ifdef CONFIG_SCHED_CORE
+u64 cpu_sched_core_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return !!css_tg(css)->core_cookie;
+}
+
+int cpu_sched_core_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	unsigned long cookie = 0, old_cookie = 0;
+	struct task_group *tg = css_tg(css);
+	struct cgroup_subsys_state *cssi;
+	struct task_group *parent = NULL;
+	int ret = 0;
+
+	if (val > 1)
+		return -ERANGE;
+
+	if (!static_branch_likely(&sched_smt_present))
+		return -ENODEV;
+
+	mutex_lock(&cgroup_mutex);
+	if (!!val == !!tg->core_cookie)
+		goto unlock;
+
+	old_cookie = tg->core_cookie;
+	if (val) {
+		cookie = sched_core_alloc_cookie();
+		if (!cookie) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		WARN_ON_ONCE(old_cookie);
+
+	} else if (tg->parent) {
+		if (tg->parent->core_parent)
+			parent = tg->parent->core_parent;
+		else if (tg->parent->core_cookie)
+			parent = tg->parent;
+	}
+
+	WARN_ON_ONCE(cookie && parent);
+
+	tg->core_cookie = sched_core_get_cookie(cookie);
+	tg->core_parent = parent;
+
+	if (cookie)
+		parent = tg;
+	else if (parent)
+		cookie = sched_core_get_cookie(parent->core_cookie);
+
+	css_for_each_descendant_pre(cssi, css) {
+		struct task_group *tgi = css_tg(cssi);
+		struct css_task_iter it;
+		struct task_struct *p;
+
+		if (tgi != tg) {
+			if (tgi->core_cookie || (tgi->core_parent && tgi->core_parent != tg))
+				continue;
+
+			tgi->core_parent = parent;
+			tgi->core_cookie = 0;
+		}
+
+		css_task_iter_start(cssi, 0, &it);
+		while ((p = css_task_iter_next(&it))) {
+			unsigned long p_cookie;
+
+			cookie = sched_core_get_cookie(cookie);
+			p_cookie = sched_core_update_cookie(p, cookie);
+			sched_core_put_cookie(p_cookie);
+		}
+		css_task_iter_end(&it);
+	}
+
+unlock:
+	mutex_unlock(&cgroup_mutex);
+
+	sched_core_put_cookie(cookie);
+	sched_core_put_cookie(old_cookie);
+	return ret;
+}
+#endif
+
 static struct cftype cpu_legacy_files[] = {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	{
@@ -10100,6 +10234,14 @@ static struct cftype cpu_legacy_files[]
 		.write = cpu_uclamp_max_write,
 	},
 #endif
+#ifdef CONFIG_SCHED_CORE
+	{
+		.name = "core_sched",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_sched_core_read_u64,
+		.write_u64 = cpu_sched_core_write_u64,
+	},
+#endif
 	{ }	/* Terminate */
 };
 
@@ -10281,6 +10423,14 @@ static struct cftype cpu_files[] = {
 		.write = cpu_uclamp_max_write,
 	},
 #endif
+#ifdef CONFIG_SCHED_CORE
+	{
+		.name = "core_sched",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_sched_core_read_u64,
+		.write_u64 = cpu_sched_core_write_u64,
+	},
+#endif
 	{ }	/* terminate */
 };
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -431,6 +431,10 @@ struct task_group {
 	struct uclamp_se	uclamp[UCLAMP_CNT];
 #endif
 
+#ifdef CONFIG_SCHED_CORE
+	struct task_group	*core_parent;
+	unsigned long		core_cookie;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1130,6 +1134,9 @@ static inline bool is_migration_disabled
 
 struct sched_group;
 #ifdef CONFIG_SCHED_CORE
+
+extern struct mutex cgroup_mutex; // XXX
+
 DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
 static inline struct cpumask *sched_group_span(struct sched_group *sg);
 



  parent reply	other threads:[~2021-04-01 17:51 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-01 13:10 [PATCH 0/9] sched: Core scheduling interfaces Peter Zijlstra
2021-04-01 13:10 ` [PATCH 1/9] sched: Allow sched_core_put() from atomic context Peter Zijlstra
2021-04-01 13:10 ` [PATCH 2/9] sched: Implement core-sched assertions Peter Zijlstra
2021-04-01 13:10 ` [PATCH 3/9] sched: Trivial core scheduling cookie management Peter Zijlstra
2021-04-01 20:04   ` Josh Don
2021-04-02  7:13     ` Peter Zijlstra
2021-04-01 13:10 ` [PATCH 4/9] sched: Default core-sched policy Peter Zijlstra
2021-04-21 13:33   ` Peter Zijlstra
2021-04-21 14:31     ` Chris Hyser
2021-04-01 13:10 ` [PATCH 5/9] sched: prctl() core-scheduling interface Peter Zijlstra
2021-04-07 17:00   ` Peter Zijlstra
2021-04-18  3:52     ` Joel Fernandes
2021-04-01 13:10 ` [PATCH 6/9] kselftest: Add test for core sched prctl interface Peter Zijlstra
2021-04-01 13:10 ` Peter Zijlstra [this message]
2021-04-02  0:34   ` [PATCH 7/9] sched: Cgroup core-scheduling interface Josh Don
2021-04-01 13:10 ` [PATCH 8/9] rbtree: Remove const from the rb_find_add() comparator Peter Zijlstra
2021-04-01 13:10 ` [PATCH 9/9] sched: prctl() and cgroup interaction Peter Zijlstra
2021-04-03  1:30   ` Josh Don
2021-04-06 15:12     ` Peter Zijlstra
2021-04-04 23:39 ` [PATCH 0/9] sched: Core scheduling interfaces Tejun Heo
2021-04-05 18:46   ` Joel Fernandes
2021-04-06 14:16     ` Tejun Heo
2021-04-18  1:35       ` Joel Fernandes
2021-04-19  9:00         ` Peter Zijlstra
2021-04-21 13:35           ` Peter Zijlstra
2021-04-21 14:45             ` Chris Hyser
2021-04-06 15:32   ` Peter Zijlstra
2021-04-06 16:08     ` Tejun Heo
2021-04-07 18:39       ` Peter Zijlstra
2021-04-07 16:50   ` Michal Koutný
2021-04-07 18:34     ` Peter Zijlstra
2021-04-08 13:25       ` Michal Koutný
2021-04-08 15:02         ` Peter Zijlstra
2021-04-09  0:16           ` Josh Don
2021-04-19 11:30       ` Tejun Heo
2021-04-20  1:17         ` Josh Don

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210401133917.469929784@infradead.org \
    --to=peterz@infradead.org \
    --cc=chris.hyser@oracle.com \
    --cc=joel@joelfernandes.org \
    --cc=joshdon@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    --subject='Re: [PATCH 7/9] sched: Cgroup core-scheduling interface' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
on how to clone and mirror all data and code used for this inbox