All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2
@ 2023-04-17 15:36 Waiman Long
  2023-04-17 15:36 ` [PATCH 5.15 2/3] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly Waiman Long
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Waiman Long @ 2023-04-17 15:36 UTC (permalink / raw)
  To: Greg Kroah-Hartman, stable
  Cc: Tejun Heo, Michal Koutný, Giuseppe Scrivano, Waiman Long

commit 18f9a4d47527772515ad6cbdac796422566e6440 upstream.

Cpuset v2 has no spread flags to set. So we can skip spread
flags update if cpuset v2 is being used. Also change the name to
cpuset_update_task_spread_flags() to indicate that there are multiple
spread flags.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cpuset.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 428820bf141d..a7bbe277ae66 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -450,11 +450,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
 /*
  * update task's spread flag if cpuset's page/slab spread flag is set
  *
- * Call with callback_lock or cpuset_rwsem held.
+ * Call with callback_lock or cpuset_rwsem held. The check can be skipped
+ * if on default hierarchy.
  */
-static void cpuset_update_task_spread_flag(struct cpuset *cs,
+static void cpuset_update_task_spread_flags(struct cpuset *cs,
 					struct task_struct *tsk)
 {
+	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
+		return;
+
 	if (is_spread_page(cs))
 		task_set_spread_page(tsk);
 	else
@@ -1941,7 +1945,7 @@ static void update_tasks_flags(struct cpuset *cs)
 
 	css_task_iter_start(&cs->css, 0, &it);
 	while ((task = css_task_iter_next(&it)))
-		cpuset_update_task_spread_flag(cs, task);
+		cpuset_update_task_spread_flags(cs, task);
 	css_task_iter_end(&it);
 }
 
@@ -2270,7 +2274,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
 
 		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-		cpuset_update_task_spread_flag(cs, task);
+		cpuset_update_task_spread_flags(cs, task);
 	}
 
 	/*
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 5.15 2/3] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
  2023-04-17 15:36 [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Waiman Long
@ 2023-04-17 15:36 ` Waiman Long
  2023-04-17 15:36 ` [PATCH 5.15 3/3] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods Waiman Long
  2023-04-19  7:20 ` [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Greg Kroah-Hartman
  2 siblings, 0 replies; 4+ messages in thread
From: Waiman Long @ 2023-04-17 15:36 UTC (permalink / raw)
  To: Greg Kroah-Hartman, stable
  Cc: Tejun Heo, Michal Koutný, Giuseppe Scrivano, Waiman Long

commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c upstream.

By default, the clone(2) syscall spawn a child process into the same
cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
into cgroups"), the child will be spawned into a different cgroup which
is somewhat similar to writing the child's tid into "cgroup.threads".

The current cpuset_fork() method does not properly handle the
CLONE_INTO_CGROUP case where the cpuset of the child may be different
from that of its parent.  Update the cpuset_fork() method to treat the
CLONE_INTO_CGROUP case similar to cpuset_attach().

Since the newly cloned task has not been running yet, its actual
memory usage isn't known. So it is not necessary to make change to mm
in cpuset_fork().

Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cpuset.c | 62 ++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a7bbe277ae66..3a7690450a3b 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2238,16 +2238,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
 }
 
 /*
- * Protected by cpuset_rwsem.  cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
  * but we can't allocate it dynamically there.  Define it global and
  * allocate from cpuset_init().
  */
 static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_to;
+
+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
+{
+	percpu_rwsem_assert_held(&cpuset_rwsem);
+
+	if (cs != &top_cpuset)
+		guarantee_online_cpus(task, cpus_attach);
+	else
+		cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+	/*
+	 * can_attach beforehand should guarantee that this doesn't
+	 * fail.  TODO: have a better way to handle failure here
+	 */
+	WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+	cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+	cpuset_update_task_spread_flags(cs, task);
+}
 
 static void cpuset_attach(struct cgroup_taskset *tset)
 {
-	/* static buf protected by cpuset_rwsem */
-	static nodemask_t cpuset_attach_nodemask_to;
 	struct task_struct *task;
 	struct task_struct *leader;
 	struct cgroup_subsys_state *css;
@@ -2262,20 +2279,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 
 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
 
-	cgroup_taskset_for_each(task, css, tset) {
-		if (cs != &top_cpuset)
-			guarantee_online_cpus(task, cpus_attach);
-		else
-			cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
-		/*
-		 * can_attach beforehand should guarantee that this doesn't
-		 * fail.  TODO: have a better way to handle failure here
-		 */
-		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
-		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-		cpuset_update_task_spread_flags(cs, task);
-	}
+	cgroup_taskset_for_each(task, css, tset)
+		cpuset_attach_task(cs, task);
 
 	/*
 	 * Change mm for all threadgroup leaders. This is expensive and may
@@ -2948,11 +2953,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
  */
 static void cpuset_fork(struct task_struct *task)
 {
-	if (task_css_is_root(task, cpuset_cgrp_id))
+	struct cpuset *cs;
+	bool same_cs;
+
+	rcu_read_lock();
+	cs = task_cs(task);
+	same_cs = (cs == task_cs(current));
+	rcu_read_unlock();
+
+	if (same_cs) {
+		if (cs == &top_cpuset)
+			return;
+
+		set_cpus_allowed_ptr(task, current->cpus_ptr);
+		task->mems_allowed = current->mems_allowed;
 		return;
+	}
 
-	set_cpus_allowed_ptr(task, current->cpus_ptr);
-	task->mems_allowed = current->mems_allowed;
+	/* CLONE_INTO_CGROUP */
+	percpu_down_write(&cpuset_rwsem);
+	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+	cpuset_attach_task(cs, task);
+	percpu_up_write(&cpuset_rwsem);
 }
 
 struct cgroup_subsys cpuset_cgrp_subsys = {
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 5.15 3/3] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
  2023-04-17 15:36 [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Waiman Long
  2023-04-17 15:36 ` [PATCH 5.15 2/3] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly Waiman Long
@ 2023-04-17 15:36 ` Waiman Long
  2023-04-19  7:20 ` [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Greg Kroah-Hartman
  2 siblings, 0 replies; 4+ messages in thread
From: Waiman Long @ 2023-04-17 15:36 UTC (permalink / raw)
  To: Greg Kroah-Hartman, stable
  Cc: Tejun Heo, Michal Koutný, Giuseppe Scrivano, Waiman Long

commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.

In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
new tasks. It is too late to check that in cpuset_fork(). So we need
to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
pre-check it before we can allow attachment to a different cpuset.

We also need to set the attach_in_progress flag to alert other code
that a new task is going to be added to the cpuset.

Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Suggested-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cpuset.c | 88 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 83 insertions(+), 5 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 3a7690450a3b..58a515b4cbc0 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2186,6 +2186,18 @@ static int fmeter_getrate(struct fmeter *fmp)
 
 static struct cpuset *cpuset_attach_old_cs;
 
+/*
+ * Check to see if a cpuset can accept a new task
+ * For v1, cpus_allowed and mems_allowed can't be empty.
+ */
+static int cpuset_can_attach_check(struct cpuset *cs)
+{
+	if (!is_in_v2_mode() &&
+	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
+		return -ENOSPC;
+	return 0;
+}
+
 /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
 static int cpuset_can_attach(struct cgroup_taskset *tset)
 {
@@ -2200,10 +2212,8 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 
 	percpu_down_write(&cpuset_rwsem);
 
-	/* allow moving tasks into an empty cpuset if on default hierarchy */
-	ret = -ENOSPC;
-	if (!is_in_v2_mode() &&
-	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
+	ret = cpuset_can_attach_check(cs);
+	if (ret)
 		goto out_unlock;
 
 	cgroup_taskset_for_each(task, css, tset) {
@@ -2220,7 +2230,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 	 * changes which zero cpus/mems_allowed.
 	 */
 	cs->attach_in_progress++;
-	ret = 0;
 out_unlock:
 	percpu_up_write(&cpuset_rwsem);
 	return ret;
@@ -2946,6 +2955,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
 	percpu_up_write(&cpuset_rwsem);
 }
 
+/*
+ * In case the child is cloned into a cpuset different from its parent,
+ * additional checks are done to see if the move is allowed.
+ */
+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
+{
+	struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+	bool same_cs;
+	int ret;
+
+	rcu_read_lock();
+	same_cs = (cs == task_cs(current));
+	rcu_read_unlock();
+
+	if (same_cs)
+		return 0;
+
+	lockdep_assert_held(&cgroup_mutex);
+	percpu_down_write(&cpuset_rwsem);
+
+	/* Check to see if task is allowed in the cpuset */
+	ret = cpuset_can_attach_check(cs);
+	if (ret)
+		goto out_unlock;
+
+	ret = task_can_attach(task, cs->effective_cpus);
+	if (ret)
+		goto out_unlock;
+
+	ret = security_task_setscheduler(task);
+	if (ret)
+		goto out_unlock;
+
+	/*
+	 * Mark attach is in progress.  This makes validate_change() fail
+	 * changes which zero cpus/mems_allowed.
+	 */
+	cs->attach_in_progress++;
+out_unlock:
+	percpu_up_write(&cpuset_rwsem);
+	return ret;
+}
+
+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+	struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+	bool same_cs;
+
+	rcu_read_lock();
+	same_cs = (cs == task_cs(current));
+	rcu_read_unlock();
+
+	if (same_cs)
+		return;
+
+	percpu_down_write(&cpuset_rwsem);
+	cs->attach_in_progress--;
+	if (!cs->attach_in_progress)
+		wake_up(&cpuset_attach_wq);
+	percpu_up_write(&cpuset_rwsem);
+}
+
 /*
  * Make sure the new task conform to the current state of its parent,
  * which could have been changed by cpuset just after it inherits the
@@ -2974,6 +3045,11 @@ static void cpuset_fork(struct task_struct *task)
 	percpu_down_write(&cpuset_rwsem);
 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
 	cpuset_attach_task(cs, task);
+
+	cs->attach_in_progress--;
+	if (!cs->attach_in_progress)
+		wake_up(&cpuset_attach_wq);
+
 	percpu_up_write(&cpuset_rwsem);
 }
 
@@ -2987,6 +3063,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
 	.attach		= cpuset_attach,
 	.post_attach	= cpuset_post_attach,
 	.bind		= cpuset_bind,
+	.can_fork	= cpuset_can_fork,
+	.cancel_fork	= cpuset_cancel_fork,
 	.fork		= cpuset_fork,
 	.legacy_cftypes	= legacy_files,
 	.dfl_cftypes	= dfl_files,
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2
  2023-04-17 15:36 [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Waiman Long
  2023-04-17 15:36 ` [PATCH 5.15 2/3] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly Waiman Long
  2023-04-17 15:36 ` [PATCH 5.15 3/3] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods Waiman Long
@ 2023-04-19  7:20 ` Greg Kroah-Hartman
  2 siblings, 0 replies; 4+ messages in thread
From: Greg Kroah-Hartman @ 2023-04-19  7:20 UTC (permalink / raw)
  To: Waiman Long; +Cc: stable, Tejun Heo, Michal Koutný, Giuseppe Scrivano

On Mon, Apr 17, 2023 at 11:36:45AM -0400, Waiman Long wrote:
> commit 18f9a4d47527772515ad6cbdac796422566e6440 upstream.
> 
> Cpuset v2 has no spread flags to set. So we can skip spread
> flags update if cpuset v2 is being used. Also change the name to
> cpuset_update_task_spread_flags() to indicate that there are multiple
> spread flags.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
>  kernel/cgroup/cpuset.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 

Same as the 5.10.y series, I've dropped all of these due to build
errors.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-04-19  7:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-17 15:36 [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Waiman Long
2023-04-17 15:36 ` [PATCH 5.15 2/3] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly Waiman Long
2023-04-17 15:36 ` [PATCH 5.15 3/3] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods Waiman Long
2023-04-19  7:20 ` [PATCH 5.15 1/3] cgroup/cpuset: Skip spread flags update on v2 Greg Kroah-Hartman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.