linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] cpuset: Make cpusets get restored on hotplug
@ 2020-03-26 19:16 Joel Fernandes (Google)
  2020-03-26 19:20 ` Tejun Heo
  0 siblings, 1 reply; 17+ messages in thread
From: Joel Fernandes (Google) @ 2020-03-26 19:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	Dmitry Shmidt, Amit Pundir, kernel-team, jsbarnes, sonnyrao,
	vpillai, peterz, Guenter Roeck, Waiman Long, Greg Kerr, cgroups,
	Johannes Weiner, Li Zefan, Tejun Heo

This deliberately changes the behavior of the per-cpuset
cpus file to not be effected by hotplug. When a cpu is offlined,
it will be removed from the cpuset/cpus file. When a cpu is onlined,
if the cpuset originally requested that that cpu was part of the cpuset,
that cpu will be restored to the cpuset. The cpus files still
have to be hierachical, but the ranges no longer have to be out of
the currently online cpus, just the physically present cpus.

To show the problem:
 # echo '1-3' > cpuset.cpus
 # cat cpuset.cpus
 1-3
 # echo 0 > /sys/devices/system/cpu/cpu2/online
 # cat cpuset.cpus
 1,3
 # echo 1 > /sys/devices/system/cpu/cpu2/online
 # cat cpuset.cpus
 1,3

With patch, the last command outputs:
 # cat cpuset.cpus
 1-3

Cc: Dmitry Shmidt <dimitrysh@google.com>
Cc: Amit Pundir <amit.pundir@linaro.org>
Cc: kernel-team@android.com
Cc: jsbarnes@google.com
Cc: sonnyrao@google.com
Cc: vpillai@digitalocean.com
Cc: peterz@infradead.org
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Greg Kerr <kerrnel@google.com>
(Original idea from Riley Andrews <riandrews@google.com> who has since
left Google).
(Joel: Forward ported from Android and ChromeOS trees to upstream,
adjusted slightly to handle the scheduling partitions work.)
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

---
This patch is in various kernel trees for > 3 years. Atleast 3
organizations using Linux need this patch to handle hotplug: Google's
Android and ChromeOS, DigitalOcean.

 kernel/cgroup/cpuset.c | 45 +++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 58f5073acff7d..5eb1fb613d0a6 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -105,6 +105,7 @@ struct cpuset {
 
 	/* user-configured CPUs and Memory Nodes allow to tasks */
 	cpumask_var_t cpus_allowed;
+	cpumask_var_t cpus_requested;
 	nodemask_t mems_allowed;
 
 	/* effective CPUs and Memory Nodes allow to tasks */
@@ -443,7 +444,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 {
-	return	cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+	return	cpumask_subset(p->cpus_requested, q->cpus_requested) &&
 		nodes_subset(p->mems_allowed, q->mems_allowed) &&
 		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
 		is_mem_exclusive(p) <= is_mem_exclusive(q);
@@ -459,12 +460,13 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  */
 static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
 {
-	cpumask_var_t *pmask1, *pmask2, *pmask3;
+	cpumask_var_t *pmask1, *pmask2, *pmask3, *pmask4;
 
 	if (cs) {
 		pmask1 = &cs->cpus_allowed;
 		pmask2 = &cs->effective_cpus;
 		pmask3 = &cs->subparts_cpus;
+		pmask4 = &cs->cpus_requested;
 	} else {
 		pmask1 = &tmp->new_cpus;
 		pmask2 = &tmp->addmask;
@@ -480,8 +482,13 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
 	if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
 		goto free_two;
 
+	if (cs && !zalloc_cpumask_var(pmask4, GFP_KERNEL))
+		goto free_three;
+
 	return 0;
 
+free_three:
+	free_cpumask_var(*pmask3);
 free_two:
 	free_cpumask_var(*pmask2);
 free_one:
@@ -498,6 +505,7 @@ static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
 {
 	if (cs) {
 		free_cpumask_var(cs->cpus_allowed);
+		free_cpumask_var(cs->cpus_requested);
 		free_cpumask_var(cs->effective_cpus);
 		free_cpumask_var(cs->subparts_cpus);
 	}
@@ -526,6 +534,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
 	}
 
 	cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+	cpumask_copy(trial->cpus_requested, cs->cpus_requested);
 	cpumask_copy(trial->effective_cpus, cs->effective_cpus);
 	return trial;
 }
@@ -594,7 +603,8 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 	cpuset_for_each_child(c, css, par) {
 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
 		    c != cur &&
-		    cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+		    cpumask_intersects(trial->cpus_requested,
+				       c->cpus_requested))
 			goto out;
 		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
 		    c != cur &&
@@ -1056,10 +1066,11 @@ static void compute_effective_cpumask(struct cpumask *new_cpus,
 	if (parent->nr_subparts_cpus) {
 		cpumask_or(new_cpus, parent->effective_cpus,
 			   parent->subparts_cpus);
-		cpumask_and(new_cpus, new_cpus, cs->cpus_allowed);
+		cpumask_and(new_cpus, new_cpus, cs->cpus_requested);
 		cpumask_and(new_cpus, new_cpus, cpu_active_mask);
 	} else {
-		cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus);
+		cpumask_and(new_cpus, cs->cpus_requested,
+			    parent->effective_cpus);
 	}
 }
 
@@ -1482,27 +1493,29 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 		return -EACCES;
 
 	/*
-	 * An empty cpus_allowed is ok only if the cpuset has no tasks.
+	 * An empty cpus_requested is ok only if the cpuset has no tasks.
 	 * Since cpulist_parse() fails on an empty mask, we special case
 	 * that parsing.  The validate_change() call ensures that cpusets
 	 * with tasks have cpus.
 	 */
 	if (!*buf) {
-		cpumask_clear(trialcs->cpus_allowed);
+		cpumask_clear(trialcs->cpus_requested);
 	} else {
-		retval = cpulist_parse(buf, trialcs->cpus_allowed);
+		retval = cpulist_parse(buf, trialcs->cpus_requested);
 		if (retval < 0)
 			return retval;
-
-		if (!cpumask_subset(trialcs->cpus_allowed,
-				    top_cpuset.cpus_allowed))
-			return -EINVAL;
 	}
 
+	if (!cpumask_subset(trialcs->cpus_requested, top_cpuset.cpus_requested))
+		return -EINVAL;
+
 	/* Nothing to do if the cpus didn't change */
-	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+	if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
 		return 0;
 
+	cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested,
+		    cpu_active_mask);
+
 	retval = validate_change(cs, trialcs);
 	if (retval < 0)
 		return retval;
@@ -1528,6 +1541,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 
 	spin_lock_irq(&callback_lock);
 	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+	cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
 
 	/*
 	 * Make sure that subparts_cpus is a subset of cpus_allowed.
@@ -2409,7 +2423,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
 
 	switch (type) {
 	case FILE_CPULIST:
-		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
 		break;
 	case FILE_MEMLIST:
 		seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@ -2778,6 +2792,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	cs->mems_allowed = parent->mems_allowed;
 	cs->effective_mems = parent->mems_allowed;
 	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+	cpumask_copy(cs->cpus_requested, parent->cpus_requested);
 	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
 	spin_unlock_irq(&callback_lock);
 out_unlock:
@@ -2892,10 +2907,12 @@ int __init cpuset_init(void)
 	BUG_ON(percpu_init_rwsem(&cpuset_rwsem));
 
 	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
 	BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
 
 	cpumask_setall(top_cpuset.cpus_allowed);
+	cpumask_setall(top_cpuset.cpus_requested);
 	nodes_setall(top_cpuset.mems_allowed);
 	cpumask_setall(top_cpuset.effective_cpus);
 	nodes_setall(top_cpuset.effective_mems);
-- 
2.25.1.696.g5e7596f4ac-goog

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 19:16 [PATCH RFC] cpuset: Make cpusets get restored on hotplug Joel Fernandes (Google)
@ 2020-03-26 19:20 ` Tejun Heo
  2020-03-26 19:44   ` Joel Fernandes
  0 siblings, 1 reply; 17+ messages in thread
From: Tejun Heo @ 2020-03-26 19:20 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Dmitry Shmidt, Amit Pundir, kernel-team, jsbarnes,
	sonnyrao, vpillai, peterz, Guenter Roeck, Waiman Long, Greg Kerr,
	cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
> This deliberately changes the behavior of the per-cpuset
> cpus file to not be effected by hotplug. When a cpu is offlined,
> it will be removed from the cpuset/cpus file. When a cpu is onlined,
> if the cpuset originally requested that that cpu was part of the cpuset,
> that cpu will be restored to the cpuset. The cpus files still
> have to be hierachical, but the ranges no longer have to be out of
> the currently online cpus, just the physically present cpus.

This is already the behavior on cgroup2 and I don't think we want to
introduce this big a behavior change to cgroup1 cpuset at this point.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 19:20 ` Tejun Heo
@ 2020-03-26 19:44   ` Joel Fernandes
  2020-03-26 19:48     ` Tejun Heo
  2020-03-26 19:57     ` Waiman Long
  0 siblings, 2 replies; 17+ messages in thread
From: Joel Fernandes @ 2020-03-26 19:44 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Dmitry Shmidt, Amit Pundir, kernel-team, jsbarnes,
	sonnyrao, vpillai, peterz, Guenter Roeck, Waiman Long, Greg Kerr,
	cgroups, Johannes Weiner, Li Zefan

Hi Tejun,

On Thu, Mar 26, 2020 at 03:20:35PM -0400, Tejun Heo wrote:
> On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
> > This deliberately changes the behavior of the per-cpuset
> > cpus file to not be effected by hotplug. When a cpu is offlined,
> > it will be removed from the cpuset/cpus file. When a cpu is onlined,
> > if the cpuset originally requested that that cpu was part of the cpuset,
> > that cpu will be restored to the cpuset. The cpus files still
> > have to be hierachical, but the ranges no longer have to be out of
> > the currently online cpus, just the physically present cpus.
> 
> This is already the behavior on cgroup2 and I don't think we want to
> introduce this big a behavior change to cgroup1 cpuset at this point.

It is not really that big a change. Please go over the patch, we are not
changing anything with how ->cpus_allowed works and interacts with the rest
of the system and the scheduler. We have just introduced a new mask to keep
track of which CPUs were requested without them being affected by hotplug. On
CPU onlining, we restore the state of ->cpus_allowed as not be affected by
hotplug.

There's 3 companies that have this issue so that should tell you something.
We don't want to carry this patch forever. Many people consider the hotplug
behavior to be completely broken.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 19:44   ` Joel Fernandes
@ 2020-03-26 19:48     ` Tejun Heo
  2020-03-26 19:57     ` Waiman Long
  1 sibling, 0 replies; 17+ messages in thread
From: Tejun Heo @ 2020-03-26 19:48 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Dmitry Shmidt, Amit Pundir, kernel-team, jsbarnes,
	sonnyrao, vpillai, peterz, Guenter Roeck, Waiman Long, Greg Kerr,
	cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 03:44:48PM -0400, Joel Fernandes wrote:
> It is not really that big a change. Please go over the patch, we are not
> changing anything with how ->cpus_allowed works and interacts with the rest
> of the system and the scheduler. We have just introduced a new mask to keep
> track of which CPUs were requested without them being affected by hotplug. On
> CPU onlining, we restore the state of ->cpus_allowed as not be affected by
> hotplug.

It's not the code. It's the behavior. I'm not flipping the behavior for
the existing cgroup1 users underneath them at this point. As-is, it's a
hard nack. If you really really really want it, put it behind a mount
option.

-- 
tejun

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 19:44   ` Joel Fernandes
  2020-03-26 19:48     ` Tejun Heo
@ 2020-03-26 19:57     ` Waiman Long
  2020-03-26 20:05       ` Sonny Rao
  2021-10-26 23:58       ` Barry Song
  1 sibling, 2 replies; 17+ messages in thread
From: Waiman Long @ 2020-03-26 19:57 UTC (permalink / raw)
  To: Joel Fernandes, Tejun Heo
  Cc: linux-kernel, Dmitry Shmidt, Amit Pundir, kernel-team, jsbarnes,
	sonnyrao, vpillai, peterz, Guenter Roeck, Greg Kerr, cgroups,
	Johannes Weiner, Li Zefan

On 3/26/20 3:44 PM, Joel Fernandes wrote:
> Hi Tejun,
>
> On Thu, Mar 26, 2020 at 03:20:35PM -0400, Tejun Heo wrote:
>> On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
>>> This deliberately changes the behavior of the per-cpuset
>>> cpus file to not be effected by hotplug. When a cpu is offlined,
>>> it will be removed from the cpuset/cpus file. When a cpu is onlined,
>>> if the cpuset originally requested that that cpu was part of the cpuset,
>>> that cpu will be restored to the cpuset. The cpus files still
>>> have to be hierachical, but the ranges no longer have to be out of
>>> the currently online cpus, just the physically present cpus.
>> This is already the behavior on cgroup2 and I don't think we want to
>> introduce this big a behavior change to cgroup1 cpuset at this point.
> It is not really that big a change. Please go over the patch, we are not
> changing anything with how ->cpus_allowed works and interacts with the rest
> of the system and the scheduler. We have just introduced a new mask to keep
> track of which CPUs were requested without them being affected by hotplug. On
> CPU onlining, we restore the state of ->cpus_allowed as not be affected by
> hotplug.
>
> There's 3 companies that have this issue so that should tell you something.
> We don't want to carry this patch forever. Many people consider the hotplug
> behavior to be completely broken.
>
I think Tejun is concerned about a change in the default behavior of
cpuset v1.

There is a special v2 mode for cpuset that is enabled by the mount
option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
v2 behavior. I introduced this v2 mode a while back to address, I think,
a similar concern. Could you try that to see if it is able to address
your problem? If not, you can make some code adjustment within the
framework of the v2 mode. As long as it is an opt-in, I think we are
open to further change.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 19:57     ` Waiman Long
@ 2020-03-26 20:05       ` Sonny Rao
  2020-03-26 20:18         ` Tejun Heo
  2020-03-26 21:47         ` Waiman Long
  2021-10-26 23:58       ` Barry Song
  1 sibling, 2 replies; 17+ messages in thread
From: Sonny Rao @ 2020-03-26 20:05 UTC (permalink / raw)
  To: Waiman Long
  Cc: Joel Fernandes, Tejun Heo, linux-kernel, Dmitry Shmidt,
	Amit Pundir, kernel-team, Jesse Barnes, vpillai, Peter Zijlstra,
	Guenter Roeck, Greg Kerr, cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 12:57 PM Waiman Long <longman@redhat.com> wrote:
>
> On 3/26/20 3:44 PM, Joel Fernandes wrote:
> > Hi Tejun,
> >
> > On Thu, Mar 26, 2020 at 03:20:35PM -0400, Tejun Heo wrote:
> >> On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
> >>> This deliberately changes the behavior of the per-cpuset
> >>> cpus file to not be effected by hotplug. When a cpu is offlined,
> >>> it will be removed from the cpuset/cpus file. When a cpu is onlined,
> >>> if the cpuset originally requested that that cpu was part of the cpuset,
> >>> that cpu will be restored to the cpuset. The cpus files still
> >>> have to be hierachical, but the ranges no longer have to be out of
> >>> the currently online cpus, just the physically present cpus.
> >> This is already the behavior on cgroup2 and I don't think we want to
> >> introduce this big a behavior change to cgroup1 cpuset at this point.
> > It is not really that big a change. Please go over the patch, we are not
> > changing anything with how ->cpus_allowed works and interacts with the rest
> > of the system and the scheduler. We have just introduced a new mask to keep
> > track of which CPUs were requested without them being affected by hotplug. On
> > CPU onlining, we restore the state of ->cpus_allowed as not be affected by
> > hotplug.
> >
> > There's 3 companies that have this issue so that should tell you something.
> > We don't want to carry this patch forever. Many people consider the hotplug
> > behavior to be completely broken.
> >
> I think Tejun is concerned about a change in the default behavior of
> cpuset v1.
>
> There is a special v2 mode for cpuset that is enabled by the mount
> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> v2 behavior. I introduced this v2 mode a while back to address, I think,
> a similar concern. Could you try that to see if it is able to address
> your problem? If not, you can make some code adjustment within the
> framework of the v2 mode. As long as it is an opt-in, I think we are
> open to further change.

I am surprised if anyone actually wants this behavior, we (Chrome OS)
found out about it accidentally, and then found that Android had been
carrying a patch to fix it.  And if it were a desirable behavior then
why isn't it an option in v2?

>
> Cheers,
> Longman
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 20:05       ` Sonny Rao
@ 2020-03-26 20:18         ` Tejun Heo
  2020-03-26 20:23           ` Joel Fernandes
  2020-03-26 21:47         ` Waiman Long
  1 sibling, 1 reply; 17+ messages in thread
From: Tejun Heo @ 2020-03-26 20:18 UTC (permalink / raw)
  To: Sonny Rao
  Cc: Waiman Long, Joel Fernandes, linux-kernel, Dmitry Shmidt,
	Amit Pundir, kernel-team, Jesse Barnes, vpillai, Peter Zijlstra,
	Guenter Roeck, Greg Kerr, cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 01:05:04PM -0700, Sonny Rao wrote:
> I am surprised if anyone actually wants this behavior, we (Chrome OS)

The behavior is silly but consistent in that it doesn't allow empty active
cpusets and it has been like that for many many years now.

> found out about it accidentally, and then found that Android had been
> carrying a patch to fix it.  And if it were a desirable behavior then
> why isn't it an option in v2?

Nobody is saying it's a good behavior (hence the change in cgroup2) and there
are situations where changing things like this is justifiable, but, here:

* The proposed change makes the interface inconsistent and does so
  unconditionally on what is now a mostly legacy interface.

* There already is a newer version of the interface which includes the
  desired behavior.

* I forgot but as Waiman pointed out, you can even opt-in to the new
  behavior, which is more comprehensive without the inconsitencies,
  while staying on cgroup1.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 20:18         ` Tejun Heo
@ 2020-03-26 20:23           ` Joel Fernandes
  2020-03-27  1:26             ` Waiman Long
  0 siblings, 1 reply; 17+ messages in thread
From: Joel Fernandes @ 2020-03-26 20:23 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Sonny Rao, Waiman Long, linux-kernel, Dmitry Shmidt, Amit Pundir,
	kernel-team, Jesse Barnes, vpillai, Peter Zijlstra,
	Guenter Roeck, Greg Kerr, cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 04:18:59PM -0400, Tejun Heo wrote:
> On Thu, Mar 26, 2020 at 01:05:04PM -0700, Sonny Rao wrote:
> > I am surprised if anyone actually wants this behavior, we (Chrome OS)
> 
> The behavior is silly but consistent in that it doesn't allow empty active
> cpusets and it has been like that for many many years now.
> 
> > found out about it accidentally, and then found that Android had been
> > carrying a patch to fix it.  And if it were a desirable behavior then
> > why isn't it an option in v2?
> 
> Nobody is saying it's a good behavior (hence the change in cgroup2) and there
> are situations where changing things like this is justifiable, but, here:
> 
> * The proposed change makes the interface inconsistent and does so
>   unconditionally on what is now a mostly legacy interface.
> 
> * There already is a newer version of the interface which includes the
>   desired behavior.
> 
> * I forgot but as Waiman pointed out, you can even opt-in to the new
>   behavior, which is more comprehensive without the inconsitencies,
>   while staying on cgroup1.

Thank you Tejun, Waiman and Sonny. I confirmed the cgroup_v2_mode option
fixes cgroup v1's broken behavior.

We will use this mount option on our systems to fix it.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 20:05       ` Sonny Rao
  2020-03-26 20:18         ` Tejun Heo
@ 2020-03-26 21:47         ` Waiman Long
  2020-03-26 22:03           ` Sonny Rao
  1 sibling, 1 reply; 17+ messages in thread
From: Waiman Long @ 2020-03-26 21:47 UTC (permalink / raw)
  To: Sonny Rao
  Cc: Joel Fernandes, Tejun Heo, linux-kernel, Dmitry Shmidt,
	Amit Pundir, kernel-team, Jesse Barnes, vpillai, Peter Zijlstra,
	Guenter Roeck, Greg Kerr, cgroups, Johannes Weiner, Li Zefan

On 3/26/20 4:05 PM, Sonny Rao wrote:
>> I think Tejun is concerned about a change in the default behavior of
>> cpuset v1.
>>
>> There is a special v2 mode for cpuset that is enabled by the mount
>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
>> v2 behavior. I introduced this v2 mode a while back to address, I think,
>> a similar concern. Could you try that to see if it is able to address
>> your problem? If not, you can make some code adjustment within the
>> framework of the v2 mode. As long as it is an opt-in, I think we are
>> open to further change.
> I am surprised if anyone actually wants this behavior, we (Chrome OS)
> found out about it accidentally, and then found that Android had been
> carrying a patch to fix it.  And if it were a desirable behavior then
> why isn't it an option in v2?
>
I am a bit confused. The v2 mode make cpuset v1 behaves more like cpuset
v2. The original v1 behavior has some obvious issue that was fixed in
v2. So what v2 option are you talking about?

Regards,
Longman


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 21:47         ` Waiman Long
@ 2020-03-26 22:03           ` Sonny Rao
  0 siblings, 0 replies; 17+ messages in thread
From: Sonny Rao @ 2020-03-26 22:03 UTC (permalink / raw)
  To: Waiman Long
  Cc: Joel Fernandes, Tejun Heo, linux-kernel, Dmitry Shmidt,
	Amit Pundir, kernel-team, Jesse Barnes, vpillai, Peter Zijlstra,
	Guenter Roeck, Greg Kerr, cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 2:47 PM Waiman Long <longman@redhat.com> wrote:
>
> On 3/26/20 4:05 PM, Sonny Rao wrote:
> >> I think Tejun is concerned about a change in the default behavior of
> >> cpuset v1.
> >>
> >> There is a special v2 mode for cpuset that is enabled by the mount
> >> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> >> v2 behavior. I introduced this v2 mode a while back to address, I think,
> >> a similar concern. Could you try that to see if it is able to address
> >> your problem? If not, you can make some code adjustment within the
> >> framework of the v2 mode. As long as it is an opt-in, I think we are
> >> open to further change.
> > I am surprised if anyone actually wants this behavior, we (Chrome OS)
> > found out about it accidentally, and then found that Android had been
> > carrying a patch to fix it.  And if it were a desirable behavior then
> > why isn't it an option in v2?
> >
> I am a bit confused. The v2 mode make cpuset v1 behaves more like cpuset
> v2. The original v1 behavior has some obvious issue that was fixed in
> v2. So what v2 option are you talking about?

I was merely pointing out the behavior of the v1 implementation is so
undesirable that it wasn't kept at all in v2.  IMHO, it's a bug that
should be fixed, and I think it's possible to keep the old behavior if
all cpus are offlined, but since you've added this option we can use
it instead.

>
> Regards,
> Longman
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 20:23           ` Joel Fernandes
@ 2020-03-27  1:26             ` Waiman Long
  2020-03-27  3:32               ` Joel Fernandes
  0 siblings, 1 reply; 17+ messages in thread
From: Waiman Long @ 2020-03-27  1:26 UTC (permalink / raw)
  To: Joel Fernandes, Tejun Heo
  Cc: Sonny Rao, linux-kernel, Dmitry Shmidt, Amit Pundir, kernel-team,
	Jesse Barnes, vpillai, Peter Zijlstra, Guenter Roeck, Greg Kerr,
	cgroups, Johannes Weiner, Li Zefan

On 3/26/20 4:23 PM, Joel Fernandes wrote:
> On Thu, Mar 26, 2020 at 04:18:59PM -0400, Tejun Heo wrote: >> On Thu, Mar 26, 2020 at 01:05:04PM -0700, Sonny Rao wrote: >>> I am
surprised if anyone actually wants this behavior, we (Chrome >>> OS) >>
>> The behavior is silly but consistent in that it doesn't allow empty
>> active cpusets and it has been like that for many many years now. >>
>>> found out about it accidentally, and then found that Android had >>>
been carrying a patch to fix it. And if it were a desirable >>> behavior
then why isn't it an option in v2? >> >> Nobody is saying it's a good
behavior (hence the change in cgroup2) >> and there are situations where
changing things like this is >> justifiable, but, here: >> >> * The
proposed change makes the interface inconsistent and does so >>
unconditionally on what is now a mostly legacy interface. >> >> * There
already is a newer version of the interface which includes >> the
desired behavior. >> >> * I forgot but as Waiman pointed out, you can
even opt-in to the >> new behavior, which is more comprehensive without
the >> inconsitencies, while staying on cgroup1. > > Thank you Tejun,
Waiman and Sonny. I confirmed the cgroup_v2_mode > option fixes cgroup
v1's broken behavior. > > We will use this mount option on our systems
to fix it.
I am glad that it works for you.

I think the problem is that the v2_mode mount option is not that well
documented. Maybe I should send a patch to add some some description
about it in cgroup-v2.rst or cgroup-v1/cpusets.rst.

Cheers,
Longman




^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-27  1:26             ` Waiman Long
@ 2020-03-27  3:32               ` Joel Fernandes
  0 siblings, 0 replies; 17+ messages in thread
From: Joel Fernandes @ 2020-03-27  3:32 UTC (permalink / raw)
  To: Waiman Long
  Cc: Tejun Heo, Sonny Rao, linux-kernel, Dmitry Shmidt, Amit Pundir,
	kernel-team, Jesse Barnes, vpillai, Peter Zijlstra,
	Guenter Roeck, Greg Kerr, cgroups, Johannes Weiner, Li Zefan

On Thu, Mar 26, 2020 at 09:26:51PM -0400, Waiman Long wrote:
[...]
> I think the problem is that the v2_mode mount option is not that well
> documented. Maybe I should send a patch to add some some description
> about it in cgroup-v2.rst or cgroup-v1/cpusets.rst.

That is definitely worth it and I would fully support that.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2020-03-26 19:57     ` Waiman Long
  2020-03-26 20:05       ` Sonny Rao
@ 2021-10-26 23:58       ` Barry Song
  2021-10-27  1:06         ` Waiman Long
  1 sibling, 1 reply; 17+ messages in thread
From: Barry Song @ 2021-10-26 23:58 UTC (permalink / raw)
  To: longman
  Cc: amit.pundir, cgroups, dimitrysh, groeck, hannes, joel, jsbarnes,
	kernel-team, kerrnel, linux-kernel, lizefan, peterz, sonnyrao,
	tj, vpillai

> I think Tejun is concerned about a change in the default behavior of
> cpuset v1.
>
> There is a special v2 mode for cpuset that is enabled by the mount
> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> v2 behavior. I introduced this v2 mode a while back to address, I think,
> a similar concern. Could you try that to see if it is able to address
> your problem? If not, you can make some code adjustment within the
> framework of the v2 mode. As long as it is an opt-in, I think we are
> open to further change.

I am also able to reproduce on Ubuntu 21.04 LTS.

all docker will be put in this cgroups and its child cgroups:
/sys/fs/cgroup/cpuset/docker

disabling and enabling SMT by:
echo off > /sys/devices/system/cpu/smt/control
echo on > /sys/devices/system/cpu/smt/control

or unpluging and pluging CPUs by:
echo 0 > /sys/devices/system/cpu/cpuX/online
echo 1 > /sys/devices/system/cpu/cpuX/online

then all docker images will lose some CPUs.

So should we document the broken behaviours somewhere?

Thanks
barry

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2021-10-26 23:58       ` Barry Song
@ 2021-10-27  1:06         ` Waiman Long
  2021-10-27  2:21           ` Barry Song
  0 siblings, 1 reply; 17+ messages in thread
From: Waiman Long @ 2021-10-27  1:06 UTC (permalink / raw)
  To: Barry Song
  Cc: amit.pundir, cgroups, dimitrysh, groeck, hannes, joel, jsbarnes,
	kernel-team, kerrnel, linux-kernel, lizefan, peterz, sonnyrao,
	tj, vpillai


On 10/26/21 7:58 PM, Barry Song wrote:
>> I think Tejun is concerned about a change in the default behavior of
>> cpuset v1.
>>
>> There is a special v2 mode for cpuset that is enabled by the mount
>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
>> v2 behavior. I introduced this v2 mode a while back to address, I think,
>> a similar concern. Could you try that to see if it is able to address
>> your problem? If not, you can make some code adjustment within the
>> framework of the v2 mode. As long as it is an opt-in, I think we are
>> open to further change.
> I am also able to reproduce on Ubuntu 21.04 LTS.
>
> all docker will be put in this cgroups and its child cgroups:
> /sys/fs/cgroup/cpuset/docker
>
> disabling and enabling SMT by:
> echo off > /sys/devices/system/cpu/smt/control
> echo on > /sys/devices/system/cpu/smt/control
>
> or unpluging and pluging CPUs by:
> echo 0 > /sys/devices/system/cpu/cpuX/online
> echo 1 > /sys/devices/system/cpu/cpuX/online
>
> then all docker images will lose some CPUs.
>
> So should we document the broken behaviours somewhere?

Is the special cpuset_v2_mode mount option able to fix the issue?

This mode is documented in

Documentation/admin-guide/cgroup-v1/cpuset.rst:

The cpuset.effective_cpus and cpuset.effective_mems files are
normally read-only copies of cpuset.cpus and cpuset.mems files
respectively.  If the cpuset cgroup filesystem is mounted with the
special "cpuset_v2_mode" option, the behavior of these files will become
similar to the corresponding files in cpuset v2.  In other words, hotplug
events will not change cpuset.cpus and cpuset.mems.  Those events will
only affect cpuset.effective_cpus and cpuset.effective_mems which show
the actual cpus and memory nodes that are currently used by this cpuset.
See Documentation/admin-guide/cgroup-v2.rst for more information about
cpuset v2 behavior.

Maybe we can make it more visible.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2021-10-27  1:06         ` Waiman Long
@ 2021-10-27  2:21           ` Barry Song
  2021-10-27  2:35             ` Waiman Long
  0 siblings, 1 reply; 17+ messages in thread
From: Barry Song @ 2021-10-27  2:21 UTC (permalink / raw)
  To: Waiman Long
  Cc: amit.pundir, cgroups, Dmitry Shmidt, groeck, hannes, joel,
	jsbarnes, kernel-team, kerrnel, LKML, lizefan, Peter Zijlstra,
	sonnyrao, Tejun Heo, vpillai

On Wed, Oct 27, 2021 at 2:06 PM Waiman Long <longman@redhat.com> wrote:
>
>
> On 10/26/21 7:58 PM, Barry Song wrote:
> >> I think Tejun is concerned about a change in the default behavior of
> >> cpuset v1.
> >>
> >> There is a special v2 mode for cpuset that is enabled by the mount
> >> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> >> v2 behavior. I introduced this v2 mode a while back to address, I think,
> >> a similar concern. Could you try that to see if it is able to address
> >> your problem? If not, you can make some code adjustment within the
> >> framework of the v2 mode. As long as it is an opt-in, I think we are
> >> open to further change.
> > I am also able to reproduce on Ubuntu 21.04 LTS.
> >
> > all docker will be put in this cgroups and its child cgroups:
> > /sys/fs/cgroup/cpuset/docker
> >
> > disabling and enabling SMT by:
> > echo off > /sys/devices/system/cpu/smt/control
> > echo on > /sys/devices/system/cpu/smt/control
> >
> > or unpluging and pluging CPUs by:
> > echo 0 > /sys/devices/system/cpu/cpuX/online
> > echo 1 > /sys/devices/system/cpu/cpuX/online
> >
> > then all docker images will lose some CPUs.
> >
> > So should we document the broken behaviours somewhere?
>
> Is the special cpuset_v2_mode mount option able to fix the issue?
>
> This mode is documented in
>
> Documentation/admin-guide/cgroup-v1/cpuset.rst:
>
> The cpuset.effective_cpus and cpuset.effective_mems files are
> normally read-only copies of cpuset.cpus and cpuset.mems files
> respectively.  If the cpuset cgroup filesystem is mounted with the
> special "cpuset_v2_mode" option, the behavior of these files will become
> similar to the corresponding files in cpuset v2.  In other words, hotplug
> events will not change cpuset.cpus and cpuset.mems.  Those events will
> only affect cpuset.effective_cpus and cpuset.effective_mems which show
> the actual cpus and memory nodes that are currently used by this cpuset.
> See Documentation/admin-guide/cgroup-v2.rst for more information about
> cpuset v2 behavior.
>
> Maybe we can make it more visible.

Is it possible to make cpuset_v2_mode true in default? not quite sure if
it will harm something.

>
> Cheers,
> Longman

Thanks
Barry

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2021-10-27  2:21           ` Barry Song
@ 2021-10-27  2:35             ` Waiman Long
  2021-10-27  2:42               ` Barry Song
  0 siblings, 1 reply; 17+ messages in thread
From: Waiman Long @ 2021-10-27  2:35 UTC (permalink / raw)
  To: Barry Song
  Cc: amit.pundir, cgroups, Dmitry Shmidt, groeck, hannes, joel,
	jsbarnes, kernel-team, kerrnel, LKML, lizefan, Peter Zijlstra,
	sonnyrao, Tejun Heo, vpillai

On 10/26/21 10:21 PM, Barry Song wrote:
> On Wed, Oct 27, 2021 at 2:06 PM Waiman Long <longman@redhat.com> wrote:
>>
>> On 10/26/21 7:58 PM, Barry Song wrote:
>>>> I think Tejun is concerned about a change in the default behavior of
>>>> cpuset v1.
>>>>
>>>> There is a special v2 mode for cpuset that is enabled by the mount
>>>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
>>>> v2 behavior. I introduced this v2 mode a while back to address, I think,
>>>> a similar concern. Could you try that to see if it is able to address
>>>> your problem? If not, you can make some code adjustment within the
>>>> framework of the v2 mode. As long as it is an opt-in, I think we are
>>>> open to further change.
>>> I am also able to reproduce on Ubuntu 21.04 LTS.
>>>
>>> all docker will be put in this cgroups and its child cgroups:
>>> /sys/fs/cgroup/cpuset/docker
>>>
>>> disabling and enabling SMT by:
>>> echo off > /sys/devices/system/cpu/smt/control
>>> echo on > /sys/devices/system/cpu/smt/control
>>>
>>> or unpluging and pluging CPUs by:
>>> echo 0 > /sys/devices/system/cpu/cpuX/online
>>> echo 1 > /sys/devices/system/cpu/cpuX/online
>>>
>>> then all docker images will lose some CPUs.
>>>
>>> So should we document the broken behaviours somewhere?
>> Is the special cpuset_v2_mode mount option able to fix the issue?
>>
>> This mode is documented in
>>
>> Documentation/admin-guide/cgroup-v1/cpuset.rst:
>>
>> The cpuset.effective_cpus and cpuset.effective_mems files are
>> normally read-only copies of cpuset.cpus and cpuset.mems files
>> respectively.  If the cpuset cgroup filesystem is mounted with the
>> special "cpuset_v2_mode" option, the behavior of these files will become
>> similar to the corresponding files in cpuset v2.  In other words, hotplug
>> events will not change cpuset.cpus and cpuset.mems.  Those events will
>> only affect cpuset.effective_cpus and cpuset.effective_mems which show
>> the actual cpus and memory nodes that are currently used by this cpuset.
>> See Documentation/admin-guide/cgroup-v2.rst for more information about
>> cpuset v2 behavior.
>>
>> Maybe we can make it more visible.
> Is it possible to make cpuset_v2_mode true in default? not quite sure if
> it will harm something.

The cpuset_v2_mode is a change in v1 behavior and that is why it is an 
opt-in as we don't want to break existing applications that have a 
dependency on the current v1 behavior. If users switch to use cgroup v2, 
they get the new behavior. Alternately, they can modify the system 
startup script to use the v2 behavior by using the mount option. I don't 
think we are going to change the v1 default behavior.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug
  2021-10-27  2:35             ` Waiman Long
@ 2021-10-27  2:42               ` Barry Song
  0 siblings, 0 replies; 17+ messages in thread
From: Barry Song @ 2021-10-27  2:42 UTC (permalink / raw)
  To: Waiman Long
  Cc: Amit Pundir, cgroups, Dmitry Shmidt, groeck, hannes, joel,
	jsbarnes, kernel-team, Greg Kerr, LKML, lizefan, Peter Zijlstra,
	sonnyrao, Tejun Heo, vpillai

On Wed, Oct 27, 2021 at 3:35 PM Waiman Long <longman@redhat.com> wrote:
>
> On 10/26/21 10:21 PM, Barry Song wrote:
> > On Wed, Oct 27, 2021 at 2:06 PM Waiman Long <longman@redhat.com> wrote:
> >>
> >> On 10/26/21 7:58 PM, Barry Song wrote:
> >>>> I think Tejun is concerned about a change in the default behavior of
> >>>> cpuset v1.
> >>>>
> >>>> There is a special v2 mode for cpuset that is enabled by the mount
> >>>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> >>>> v2 behavior. I introduced this v2 mode a while back to address, I think,
> >>>> a similar concern. Could you try that to see if it is able to address
> >>>> your problem? If not, you can make some code adjustment within the
> >>>> framework of the v2 mode. As long as it is an opt-in, I think we are
> >>>> open to further change.
> >>> I am also able to reproduce on Ubuntu 21.04 LTS.
> >>>
> >>> all docker will be put in this cgroups and its child cgroups:
> >>> /sys/fs/cgroup/cpuset/docker
> >>>
> >>> disabling and enabling SMT by:
> >>> echo off > /sys/devices/system/cpu/smt/control
> >>> echo on > /sys/devices/system/cpu/smt/control
> >>>
> >>> or unpluging and pluging CPUs by:
> >>> echo 0 > /sys/devices/system/cpu/cpuX/online
> >>> echo 1 > /sys/devices/system/cpu/cpuX/online
> >>>
> >>> then all docker images will lose some CPUs.
> >>>
> >>> So should we document the broken behaviours somewhere?
> >> Is the special cpuset_v2_mode mount option able to fix the issue?
> >>
> >> This mode is documented in
> >>
> >> Documentation/admin-guide/cgroup-v1/cpuset.rst:
> >>
> >> The cpuset.effective_cpus and cpuset.effective_mems files are
> >> normally read-only copies of cpuset.cpus and cpuset.mems files
> >> respectively.  If the cpuset cgroup filesystem is mounted with the
> >> special "cpuset_v2_mode" option, the behavior of these files will become
> >> similar to the corresponding files in cpuset v2.  In other words, hotplug
> >> events will not change cpuset.cpus and cpuset.mems.  Those events will
> >> only affect cpuset.effective_cpus and cpuset.effective_mems which show
> >> the actual cpus and memory nodes that are currently used by this cpuset.
> >> See Documentation/admin-guide/cgroup-v2.rst for more information about
> >> cpuset v2 behavior.
> >>
> >> Maybe we can make it more visible.
> > Is it possible to make cpuset_v2_mode true in default? not quite sure if
> > it will harm something.
>
> The cpuset_v2_mode is a change in v1 behavior and that is why it is an
> opt-in as we don't want to break existing applications that have a
> dependency on the current v1 behavior. If users switch to use cgroup v2,
> they get the new behavior. Alternately, they can modify the system
> startup script to use the v2 behavior by using the mount option. I don't
> think we are going to change the v1 default behavior.

ok, understood.

the old behaviour seems quite odd and linux distributions don't realize it.
for example, ubuntu is simply losing cpu for docker.

>
> Cheers,
> Longman

Thanks
barry

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2021-10-27  2:42 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-26 19:16 [PATCH RFC] cpuset: Make cpusets get restored on hotplug Joel Fernandes (Google)
2020-03-26 19:20 ` Tejun Heo
2020-03-26 19:44   ` Joel Fernandes
2020-03-26 19:48     ` Tejun Heo
2020-03-26 19:57     ` Waiman Long
2020-03-26 20:05       ` Sonny Rao
2020-03-26 20:18         ` Tejun Heo
2020-03-26 20:23           ` Joel Fernandes
2020-03-27  1:26             ` Waiman Long
2020-03-27  3:32               ` Joel Fernandes
2020-03-26 21:47         ` Waiman Long
2020-03-26 22:03           ` Sonny Rao
2021-10-26 23:58       ` Barry Song
2021-10-27  1:06         ` Waiman Long
2021-10-27  2:21           ` Barry Song
2021-10-27  2:35             ` Waiman Long
2021-10-27  2:42               ` Barry Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).