All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: joel@joelfernandes.org, chris.hyser@oracle.com,
	joshdon@google.com, mingo@kernel.org, vincent.guittot@linaro.org,
	valentin.schneider@arm.com, mgorman@suse.de
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org,
	tj@kernel.org, tglx@linutronix.de
Subject: [PATCH 7/9] sched: Cgroup core-scheduling interface
Date: Thu, 01 Apr 2021 15:10:19 +0200	[thread overview]
Message-ID: <20210401133917.469929784@infradead.org> (raw)
In-Reply-To: 20210401131012.395311786@infradead.org

Implement a basic cgroup core-scheduling interface.

A new cpu.core_sched file is added which takes the values 0,1. When
set, the cgroup and all it's descendants will be granted the same
cookie and thus allowed to share a core with each-other, but not with
system tasks or tasks of other subtrees that might have another
cookie.

The file is hierarchical, and a subtree can again set it to 1, in
which case that subtree will get a different cookie and will no longer
share with the parent tree.

For each task, the nearest core_sched parent 'wins'.

Interaction with the prctl() interface is non-existent and left for a
future patch.

Noteably; this patch somewhat abuses cgroup_mutex. By holding
cgroup_mutex over the write() operation, which sets the cookie, the
cookie is stable in any cgroup callback (that is called with
cgroup_mutex held). A future patch relies on ss->can_attach() and
ss->attach() being 'atomic', which is hard to do without cgroup_mutex.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c  |  150 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |    7 ++
 2 files changed, 157 insertions(+)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5688,10 +5688,53 @@ static inline void sched_core_cpu_starti
 		}
 	}
 }
+
+void sched_core_cgroup_online(struct task_group *parent, struct task_group *tg)
+{
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (parent->core_parent) {
+		WARN_ON_ONCE(parent->core_cookie);
+		WARN_ON_ONCE(!parent->core_parent->core_cookie);
+		tg->core_parent = parent->core_parent;
+
+	} else if (parent->core_cookie) {
+		WARN_ON_ONCE(parent->core_parent);
+		tg->core_parent = parent;
+	}
+}
+
+void sched_core_cgroup_free(struct task_group *tg)
+{
+	sched_core_put_cookie(tg->core_cookie);
+}
+
+unsigned long sched_core_cgroup_cookie(struct task_group *tg)
+{
+	unsigned long cookie = 0;
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (tg->core_cookie)
+		cookie = tg->core_cookie;
+	else if (tg->core_parent)
+		cookie = tg->core_parent->core_cookie;
+
+	return sched_core_get_cookie(cookie);
+}
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline void sched_core_cpu_starting(unsigned int cpu) {}
 
+static inline void sched_core_cgroup_free(struct task_group *tg) { }
+static inline void sched_core_cgroup_online(struct task_group *parent, struct task_group tg) { }
+
+static inline unsigned long sched_core_cgroup_cookie(struct task_group *tg)
+{
+	return 0;
+}
+
 static struct task_struct *
 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
@@ -9310,6 +9353,7 @@ static void sched_free_group(struct task
 	free_fair_sched_group(tg);
 	free_rt_sched_group(tg);
 	autogroup_free(tg);
+	sched_core_cgroup_free(tg);
 	kmem_cache_free(task_group_cache, tg);
 }
 
@@ -9353,6 +9397,8 @@ void sched_online_group(struct task_grou
 	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	online_fair_sched_group(tg);
+
+	sched_core_cgroup_online(parent, tg);
 }
 
 /* rcu callback to free various structures associated with a task group */
@@ -9414,6 +9460,7 @@ void sched_move_task(struct task_struct
 {
 	int queued, running, queue_flags =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+	unsigned long cookie;
 	struct rq_flags rf;
 	struct rq *rq;
 
@@ -9443,6 +9490,10 @@ void sched_move_task(struct task_struct
 	}
 
 	task_rq_unlock(rq, tsk, &rf);
+
+	cookie = sched_core_cgroup_cookie(tsk->sched_task_group);
+	cookie = sched_core_update_cookie(tsk, cookie);
+	sched_core_put_cookie(cookie);
 }
 
 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
@@ -10050,6 +10101,89 @@ static u64 cpu_rt_period_read_uint(struc
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+#ifdef CONFIG_SCHED_CORE
+u64 cpu_sched_core_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return !!css_tg(css)->core_cookie;
+}
+
+int cpu_sched_core_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	unsigned long cookie = 0, old_cookie = 0;
+	struct task_group *tg = css_tg(css);
+	struct cgroup_subsys_state *cssi;
+	struct task_group *parent = NULL;
+	int ret = 0;
+
+	if (val > 1)
+		return -ERANGE;
+
+	if (!static_branch_likely(&sched_smt_present))
+		return -ENODEV;
+
+	mutex_lock(&cgroup_mutex);
+	if (!!val == !!tg->core_cookie)
+		goto unlock;
+
+	old_cookie = tg->core_cookie;
+	if (val) {
+		cookie = sched_core_alloc_cookie();
+		if (!cookie) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		WARN_ON_ONCE(old_cookie);
+
+	} else if (tg->parent) {
+		if (tg->parent->core_parent)
+			parent = tg->parent->core_parent;
+		else if (tg->parent->core_cookie)
+			parent = tg->parent;
+	}
+
+	WARN_ON_ONCE(cookie && parent);
+
+	tg->core_cookie = sched_core_get_cookie(cookie);
+	tg->core_parent = parent;
+
+	if (cookie)
+		parent = tg;
+	else if (parent)
+		cookie = sched_core_get_cookie(parent->core_cookie);
+
+	css_for_each_descendant_pre(cssi, css) {
+		struct task_group *tgi = css_tg(cssi);
+		struct css_task_iter it;
+		struct task_struct *p;
+
+		if (tgi != tg) {
+			if (tgi->core_cookie || (tgi->core_parent && tgi->core_parent != tg))
+				continue;
+
+			tgi->core_parent = parent;
+			tgi->core_cookie = 0;
+		}
+
+		css_task_iter_start(cssi, 0, &it);
+		while ((p = css_task_iter_next(&it))) {
+			unsigned long p_cookie;
+
+			cookie = sched_core_get_cookie(cookie);
+			p_cookie = sched_core_update_cookie(p, cookie);
+			sched_core_put_cookie(p_cookie);
+		}
+		css_task_iter_end(&it);
+	}
+
+unlock:
+	mutex_unlock(&cgroup_mutex);
+
+	sched_core_put_cookie(cookie);
+	sched_core_put_cookie(old_cookie);
+	return ret;
+}
+#endif
+
 static struct cftype cpu_legacy_files[] = {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	{
@@ -10100,6 +10234,14 @@ static struct cftype cpu_legacy_files[]
 		.write = cpu_uclamp_max_write,
 	},
 #endif
+#ifdef CONFIG_SCHED_CORE
+	{
+		.name = "core_sched",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_sched_core_read_u64,
+		.write_u64 = cpu_sched_core_write_u64,
+	},
+#endif
 	{ }	/* Terminate */
 };
 
@@ -10281,6 +10423,14 @@ static struct cftype cpu_files[] = {
 		.write = cpu_uclamp_max_write,
 	},
 #endif
+#ifdef CONFIG_SCHED_CORE
+	{
+		.name = "core_sched",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_sched_core_read_u64,
+		.write_u64 = cpu_sched_core_write_u64,
+	},
+#endif
 	{ }	/* terminate */
 };
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -431,6 +431,10 @@ struct task_group {
 	struct uclamp_se	uclamp[UCLAMP_CNT];
 #endif
 
+#ifdef CONFIG_SCHED_CORE
+	struct task_group	*core_parent;
+	unsigned long		core_cookie;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1130,6 +1134,9 @@ static inline bool is_migration_disabled
 
 struct sched_group;
 #ifdef CONFIG_SCHED_CORE
+
+extern struct mutex cgroup_mutex; // XXX
+
 DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
 static inline struct cpumask *sched_group_span(struct sched_group *sg);
 



  parent reply	other threads:[~2021-04-01 17:51 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-01 13:10 [PATCH 0/9] sched: Core scheduling interfaces Peter Zijlstra
2021-04-01 13:10 ` [PATCH 1/9] sched: Allow sched_core_put() from atomic context Peter Zijlstra
2021-04-01 13:10 ` [PATCH 2/9] sched: Implement core-sched assertions Peter Zijlstra
2021-04-01 13:10 ` [PATCH 3/9] sched: Trivial core scheduling cookie management Peter Zijlstra
2021-04-01 20:04   ` Josh Don
2021-04-02  7:13     ` Peter Zijlstra
2021-04-01 13:10 ` [PATCH 4/9] sched: Default core-sched policy Peter Zijlstra
2021-04-21 13:33   ` Peter Zijlstra
2021-04-21 14:31     ` Chris Hyser
2021-04-01 13:10 ` [PATCH 5/9] sched: prctl() core-scheduling interface Peter Zijlstra
2021-04-07 17:00   ` Peter Zijlstra
2021-04-18  3:52     ` Joel Fernandes
2021-04-01 13:10 ` [PATCH 6/9] kselftest: Add test for core sched prctl interface Peter Zijlstra
2021-04-01 13:10 ` Peter Zijlstra [this message]
2021-04-02  0:34   ` [PATCH 7/9] sched: Cgroup core-scheduling interface Josh Don
2021-04-01 13:10 ` [PATCH 8/9] rbtree: Remove const from the rb_find_add() comparator Peter Zijlstra
2021-04-01 13:10 ` [PATCH 9/9] sched: prctl() and cgroup interaction Peter Zijlstra
2021-04-03  1:30   ` Josh Don
2021-04-06 15:12     ` Peter Zijlstra
2021-04-04 23:39 ` [PATCH 0/9] sched: Core scheduling interfaces Tejun Heo
2021-04-05 18:46   ` Joel Fernandes
2021-04-06 14:16     ` Tejun Heo
2021-04-18  1:35       ` Joel Fernandes
2021-04-19  9:00         ` Peter Zijlstra
2021-04-21 13:35           ` Peter Zijlstra
2021-04-21 14:45             ` Chris Hyser
2021-04-06 15:32   ` Peter Zijlstra
2021-04-06 16:08     ` Tejun Heo
2021-04-07 18:39       ` Peter Zijlstra
2021-04-07 16:50   ` Michal Koutný
2021-04-07 18:34     ` Peter Zijlstra
2021-04-08 13:25       ` Michal Koutný
2021-04-08 15:02         ` Peter Zijlstra
2021-04-09  0:16           ` Josh Don
2021-04-19 11:30       ` Tejun Heo
2021-04-20  1:17         ` Josh Don

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210401133917.469929784@infradead.org \
    --to=peterz@infradead.org \
    --cc=chris.hyser@oracle.com \
    --cc=joel@joelfernandes.org \
    --cc=joshdon@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.