All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
@ 2015-02-05  8:33 Zefan Li
  2015-02-05 14:25 ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Zefan Li @ 2015-02-05  8:33 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Mike Galbraith, LKML, Stefan Bader

This is how to reproduce the bug:

int main() {
    struct sched_param param = {.sched_priority=1};

    if (fork() > 0)
        exit(0);

    setsid();

    if (sched_setscheduler(0, SCHED_RR, &param) < 0){
        perror("failed to sched_setscheduler()");
        return -1;
    }

    while(1)
        ;
}

  # ./test
  # mount -t cgroup -o cpu xxx /cgroup
  # cat /cgroup/cpu.rt_runtime_us
  950000
  # echo 940000 > /cgroup/cpu.rt_runtime_us
  Device or Resource busy

An autogroup has been created and there's an RT task in it. RT tasks in
autogroups are redirected to the root group and task_group() should
return &root_task_group, but it's broken and returns the autogroup.

We should reset p->sched_task_group when changing a normal task to an
RT task which is in autogroup.

Fixes: 8323f26ce342 ("sched: Fix race in task_group()")
Cc: <stable@vger.kernel.org> # 3.6+
Reported-by: Zhang Wei <zhangwei555@huawei.com>
Signed-off-by: Zefan Li <lizefan@huawei.com>
---
 kernel/sched/core.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 89e7283..fccde96 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3414,6 +3414,7 @@ static int __sched_setscheduler(struct task_struct *p,
 	const struct sched_class *prev_class;
 	struct rq *rq;
 	int reset_on_fork;
+	bool reset_task_group = false;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
@@ -3546,10 +3547,13 @@ change:
 		 * assigned.
 		 */
 		if (rt_bandwidth_enabled() && rt_policy(policy) &&
-				task_group(p)->rt_bandwidth.rt_runtime == 0 &&
-				!task_group_is_autogroup(task_group(p))) {
-			task_rq_unlock(rq, p, &flags);
-			return -EPERM;
+		    task_group(p)->rt_bandwidth.rt_runtime == 0) {
+			if (!task_group_is_autogroup(task_group(p))) {
+				task_rq_unlock(rq, p, &flags);
+				return -EPERM;
+			} else {
+				reset_task_group = true;
+			}
 		}
 #endif
 #ifdef CONFIG_SMP
@@ -3615,6 +3619,9 @@ change:
 	prev_class = p->sched_class;
 	__setscheduler(rq, p, attr);
 
+	if (reset_task_group)
+		p->sched_task_group = &root_task_group;
+
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (queued) {
-- 
1.8.0.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-05  8:33 [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us Zefan Li
@ 2015-02-05 14:25 ` Peter Zijlstra
  2015-02-06  1:30   ` Zefan Li
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2015-02-05 14:25 UTC (permalink / raw)
  To: Zefan Li; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

On Thu, Feb 05, 2015 at 04:33:24PM +0800, Zefan Li wrote:
> This is how to reproduce the bug:
> 
> int main() {
>     struct sched_param param = {.sched_priority=1};
> 
>     if (fork() > 0)
>         exit(0);
> 
>     setsid();
> 
>     if (sched_setscheduler(0, SCHED_RR, &param) < 0){
>         perror("failed to sched_setscheduler()");
>         return -1;
>     }
> 
>     while(1)
>         ;
> }
> 
>   # ./test
>   # mount -t cgroup -o cpu xxx /cgroup
>   # cat /cgroup/cpu.rt_runtime_us
>   950000
>   # echo 940000 > /cgroup/cpu.rt_runtime_us
>   Device or Resource busy

That's -EBUSY, but you're changing an -EPERM condition. Neither your
patch nor explanation of the matter make sense.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-05 14:25 ` Peter Zijlstra
@ 2015-02-06  1:30   ` Zefan Li
  2015-02-06 10:58     ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Zefan Li @ 2015-02-06  1:30 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

On 2015/2/5 22:25, Peter Zijlstra wrote:
> On Thu, Feb 05, 2015 at 04:33:24PM +0800, Zefan Li wrote:
>> This is how to reproduce the bug:
>>
>> int main() {
>>     struct sched_param param = {.sched_priority=1};
>>
>>     if (fork() > 0)
>>         exit(0);
>>
>>     setsid();
>>
>>     if (sched_setscheduler(0, SCHED_RR, &param) < 0){
>>         perror("failed to sched_setscheduler()");
>>         return -1;
>>     }
>>

btw, if you move setscheduler() abvoe fork(), changing rt_runtime
will succeed.

>>     while(1)
>>         ;
>> }
>>
>>   # ./test
>>   # mount -t cgroup -o cpu xxx /cgroup
>>   # cat /cgroup/cpu.rt_runtime_us
>>   950000
>>   # echo 940000 > /cgroup/cpu.rt_runtime_us
>>   Device or Resource busy
> 
> That's -EBUSY, but you're changing an -EPERM condition. Neither your
> patch nor explanation of the matter make sense.
> 

I'm not changing the -EPERM condition. I'm adding an else condition which
resets p->sched_task_group.

After running the test program, we have:

  root
   |
  autogroup   <-- the RT test program is now here

root.rt_runtime = 950000
autogroup.rt_runtime = 0

Now if you try to change the rt_runtime of the root cgroup, it returns
-EBUSY.

That comes from:

static int tg_rt_schedulable(struct task_group *tg, void *data)
(
	...
        if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
                return -EBUSY;
	...
}

static inline int tg_has_rt_tasks(struct task_group *tg)
{
        struct task_struct *g, *p;

        for_each_process_thread(g, p) {
                if (rt_task(p) && task_group(p) == tg)
                        return 1;
        }

        return 0;
}

here tg == autogroup, p == the test program, and task_group(p)
returns autogroup, which is wrong.

See this commit:

commit f44937718ce3b8360f72f6c68c9481712517a867
Author: Mike Galbraith <efault@gmx.de>
Date:   Thu Jan 13 04:54:50 2011 +0100

    sched, autogroup: Fix CONFIG_RT_GROUP_SCHED sched_setscheduler() failure
...
+       /*
+        * Autogroup RT tasks are redirected to the root task group
	...
+        * the policy change to proceed.  Thereafter, task_group()
+        * returns &root_task_group, ...
+        */



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-06  1:30   ` Zefan Li
@ 2015-02-06 10:58     ` Peter Zijlstra
  2015-02-07  7:02       ` Zefan Li
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2015-02-06 10:58 UTC (permalink / raw)
  To: Zefan Li; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

On Fri, Feb 06, 2015 at 09:30:51AM +0800, Zefan Li wrote:
> After running the test program, we have:
> 
>   root
>    |
>   autogroup   <-- the RT test program is now here
> 
> root.rt_runtime = 950000
> autogroup.rt_runtime = 0
> 
> Now if you try to change the rt_runtime of the root cgroup, it returns
> -EBUSY.
> 
> That comes from:
> 
> static int tg_rt_schedulable(struct task_group *tg, void *data)
> (
> 	...
>         if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
>                 return -EBUSY;
> 	...
> }
> 
> static inline int tg_has_rt_tasks(struct task_group *tg)
> {
>         struct task_struct *g, *p;
> 
>         for_each_process_thread(g, p) {
>                 if (rt_task(p) && task_group(p) == tg)
>                         return 1;
>         }
> 
>         return 0;
> }
> 
> here tg == autogroup, p == the test program, and task_group(p)
> returns autogroup, which is wrong.

No its not.

> See this commit:
> 
> commit f44937718ce3b8360f72f6c68c9481712517a867
> Author: Mike Galbraith <efault@gmx.de>
> Date:   Thu Jan 13 04:54:50 2011 +0100
> 
>     sched, autogroup: Fix CONFIG_RT_GROUP_SCHED sched_setscheduler() failure
> ...
> +       /*
> +        * Autogroup RT tasks are redirected to the root task group
> 	...
> +        * the policy change to proceed.  Thereafter, task_group()
> +        * returns &root_task_group, ...
> +        */

That comment is misleading; if you look at the actual code what we do is
redirect RT programs to _run_ in the root_task_group, but their
task_group() should still be autogroup.

Otherwise people could escape their cgroup by switching to and from a RT
class.

So what I think you want is something like the below; preferably with a
comment on ;-)

---
 kernel/sched/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1f37fe7f77a4..f4fd048ce7cf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7644,6 +7644,9 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
 	struct task_struct *g, *p;
 
+	if (task_group_is_autogroup(tg))
+		return 0;
+
 	for_each_process_thread(g, p) {
 		if (rt_task(p) && task_group(p) == tg)
 			return 1;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-06 10:58     ` Peter Zijlstra
@ 2015-02-07  7:02       ` Zefan Li
  2015-02-09 11:22         ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Zefan Li @ 2015-02-07  7:02 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

>> See this commit:
>>
>> commit f44937718ce3b8360f72f6c68c9481712517a867
>> Author: Mike Galbraith <efault@gmx.de>
>> Date:   Thu Jan 13 04:54:50 2011 +0100
>>
>>     sched, autogroup: Fix CONFIG_RT_GROUP_SCHED sched_setscheduler() failure
>> ...
>> +       /*
>> +        * Autogroup RT tasks are redirected to the root task group
>> 	...
>> +        * the policy change to proceed.  Thereafter, task_group()
>> +        * returns &root_task_group, ...
>> +        */
> 
> That comment is misleading; if you look at the actual code what we do is
> redirect RT programs to _run_ in the root_task_group, but their
> task_group() should still be autogroup.
> 
> Otherwise people could escape their cgroup by switching to and from a RT
> class.
> 

Before 8323f26ce342 "sched: Fix race in task_group()", task_group() of
those RT tasks always return root_task_group, but the escape can't happen.

After commit 8323f26ce342, task_group always return root_task_group except
for the case I showed:

1. Change scheduling policy before setsid():

 # cat /proc/sched_debug | grep test
 R           test  4194     24851.893077       945   120     24851.893077     11196.482331         0.000000 /

2. Change policy after setsid():

 R           test  4142      4962.517723       420   120      4962.517723      4974.126149         0.000000 /autogroup-44

I think we can fix it with:

diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230..8c3a169 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -115,9 +115,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	if (tg != &root_task_group)
 		return false;
 
-	if (p->sched_class != &fair_sched_class)
-		return false;
-
 	/*
 	 * We can only assume the task group can't go away on us if
 	 * autogroup_move_group() can see us on ->thread_group list.

> So what I think you want is something like the below; preferably with a
> comment on ;-)
> 

This is exactly what I did at first, but besides the issue described above,
seems it might lead to starving RT tasks.

If there's some rt task in autogroups but none in root cgroup, it's allowed
to set rt_runtime to 0, so I think we have to disallow this setting, like what
we already do with global rt_runtime.

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 89e7283..3f6c3ad 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7460,6 +7460,9 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
 	struct task_struct *g, *p;
 
+	if (task_group_is_autogroup(tg))
+		return 0;
+
 	for_each_process_thread(g, p) {
 		if (rt_task(p) && task_group(p) == tg)
 			return 1;
@@ -7540,6 +7543,9 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 		.rt_runtime = runtime,
 	};
 
+	if (tg == &root_task_group && runtime == 0)
+		return -EINVAL;
+
 	rcu_read_lock();
 	ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
 	rcu_read_unlock();

> ---
>  kernel/sched/core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 1f37fe7f77a4..f4fd048ce7cf 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7644,6 +7644,9 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
>  {
>  	struct task_struct *g, *p;
>  
> +	if (task_group_is_autogroup(tg))
> +		return 0;
> +
>  	for_each_process_thread(g, p) {
>  		if (rt_task(p) && task_group(p) == tg)
>  			return 1;
> .
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-07  7:02       ` Zefan Li
@ 2015-02-09 11:22         ` Peter Zijlstra
  2015-02-09 11:27           ` Peter Zijlstra
                             ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Peter Zijlstra @ 2015-02-09 11:22 UTC (permalink / raw)
  To: Zefan Li; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

On Sat, Feb 07, 2015 at 03:02:11PM +0800, Zefan Li wrote:
> Before 8323f26ce342 "sched: Fix race in task_group()", task_group() of
> those RT tasks always return root_task_group, but the escape can't happen.

Ah, yes, I'm an idiot. I'm not sure what I was thinking, but I seemed
to have confused myself very well indeed.

> After commit 8323f26ce342, task_group always return root_task_group except
> for the case I showed:
> 
> 1. Change scheduling policy before setsid():
> 
>  # cat /proc/sched_debug | grep test
>  R           test  4194     24851.893077       945   120     24851.893077     11196.482331         0.000000 /
> 
> 2. Change policy after setsid():
> 
>  R           test  4142      4962.517723       420   120      4962.517723      4974.126149         0.000000 /autogroup-44

Yes, which of course is inconsistent as well, it really is in the
autogroup, regardless of its class.

> I think we can fix it with:
> 
> diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
> index 8a2e230..8c3a169 100644
> --- a/kernel/sched/auto_group.c
> +++ b/kernel/sched/auto_group.c
> @@ -115,9 +115,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
>  	if (tg != &root_task_group)
>  		return false;
>  
> -	if (p->sched_class != &fair_sched_class)
> -		return false;
> -

Yes.

> This is exactly what I did at first, but besides the issue described above,
> seems it might lead to starving RT tasks.
> 
> If there's some rt task in autogroups but none in root cgroup, it's allowed
> to set rt_runtime to 0, so I think we have to disallow this setting, like what
> we already do with global rt_runtime.

> @@ -7540,6 +7543,9 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
>  		.rt_runtime = runtime,
>  	};
>  
> +	if (tg == &root_task_group && runtime == 0)
> +		return -EINVAL;
> +

Indeed, setting runtime=0 for the root group is a very bad thing
regardless of this patch. It would disallow the kernel from creating RT
threads, which it needs for 'correct' operation in a number of cases.

But lets make that a separate patch.

So how about this?

---
Subject: sched, autogroup: Fix failure to set cpu.rt_runtime_us
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon Feb  9 11:53:18 CET 2015

Because task_group() uses a cache of autogroup_task_group(), whoes
output depends on sched_class, switching classes can generate
problems.

In particular, when started as fair, the cache points to the
autogroup, so when switching to RT the tg_rt_schedulable() test fails
for every cpu.rt_{runtime,period}_us change because now the autogroup
has tasks and no runtime.

Furthermore, going back to the previous semantics of varying
task_group() with sched_class has the down-side that the sched_debug
output varies as well, even though the task really is in the
autogroup.

Therefore add an autogroup exception to tg_has_rt_tasks() -- such that
both (all) task_group() usages in sched/core now have one. And remove
all the remnants of the variable task_group() output.

Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Stefan Bader <stefan.bader@canonical.com>
Reported-by: Zefan Li <lizefan@huawei.com>
Fixes: 8323f26ce342 ("sched: Fix race in task_group()")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/auto_group.c |    6 +-----
 kernel/sched/core.c       |    6 ++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogrou
 	 * so we don't have to move tasks around upon policy change,
 	 * or flail around trying to allocate bandwidth on the fly.
 	 * A bandwidth exception in __sched_setscheduler() allows
-	 * the policy change to proceed.  Thereafter, task_group()
-	 * returns &root_task_group, so zero bandwidth is required.
+	 * the policy change to proceed.
 	 */
 	free_rt_sched_group(tg);
 	tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_st
 	if (tg != &root_task_group)
 		return false;
 
-	if (p->sched_class != &fair_sched_class)
-		return false;
-
 	/*
 	 * We can only assume the task group can't go away on us if
 	 * autogroup_move_group() can see us on ->thread_group list.
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7644,6 +7644,12 @@ static inline int tg_has_rt_tasks(struct
 {
 	struct task_struct *g, *p;
 
+	/*
+	 * Autogroups do not have RT tasks; see autogroup_create().
+	 */
+	if (task_group_is_autogroup(tg))
+		return 0;
+
 	for_each_process_thread(g, p) {
 		if (rt_task(p) && task_group(p) == tg)
 			return 1;


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-09 11:22         ` Peter Zijlstra
@ 2015-02-09 11:27           ` Peter Zijlstra
  2015-02-18 17:09             ` [tip:sched/core] sched/rt: Avoid obvious configuration fail tip-bot for Peter Zijlstra
  2015-02-10  1:26           ` [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us Zefan Li
  2015-02-18 17:09           ` [tip:sched/core] sched/autogroup: Fix failure to set cpu.rt_runtime_us tip-bot for Peter Zijlstra
  2 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2015-02-09 11:27 UTC (permalink / raw)
  To: Zefan Li; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

On Mon, Feb 09, 2015 at 12:22:37PM +0100, Peter Zijlstra wrote:
> Indeed, setting runtime=0 for the root group is a very bad thing
> regardless of this patch. It would disallow the kernel from creating RT
> threads, which it needs for 'correct' operation in a number of cases.
> 
> But lets make that a separate patch.

---
Subject: sched,rt: Avoid obvious configuration fail
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon Feb  9 12:23:20 CET 2015

Setting the root group's cpu.rt_runtime_us to 0 is a bad thing; it
would disallow the kernel creating RT tasks.

One can of course still set it to 1, which will (likely) still wreck
your kernel, but at least make it clear that setting it to 0 is not
good.

Collect both sanity checks into the one place while we're there.

Suggested-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c |   14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7742,6 +7742,17 @@ static int tg_set_rt_bandwidth(struct ta
 {
 	int i, err = 0;
 
+	/*
+	 * Disallowing the root group RT runtime is BAD, it would disallow the
+	 * kernel creating (and or operating) RT threads.
+	 */
+	if (tg == &root_task_group && rt_runtime == 0)
+		return -EINVAL;
+
+	/* No period doesn't make any sense. */
+	if (rt_period == 0)
+		return -EINVAL;
+
 	mutex_lock(&rt_constraints_mutex);
 	read_lock(&tasklist_lock);
 	err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7798,9 +7809,6 @@ static int sched_group_set_rt_period(str
 	rt_period = (u64)rt_period_us * NSEC_PER_USEC;
 	rt_runtime = tg->rt_bandwidth.rt_runtime;
 
-	if (rt_period == 0)
-		return -EINVAL;
-
 	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us
  2015-02-09 11:22         ` Peter Zijlstra
  2015-02-09 11:27           ` Peter Zijlstra
@ 2015-02-10  1:26           ` Zefan Li
  2015-02-18 17:09           ` [tip:sched/core] sched/autogroup: Fix failure to set cpu.rt_runtime_us tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 10+ messages in thread
From: Zefan Li @ 2015-02-10  1:26 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Mike Galbraith, LKML, Stefan Bader

> Subject: sched, autogroup: Fix failure to set cpu.rt_runtime_us
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Mon Feb  9 11:53:18 CET 2015
> 
> Because task_group() uses a cache of autogroup_task_group(), whoes
> output depends on sched_class, switching classes can generate
> problems.
> 
> In particular, when started as fair, the cache points to the
> autogroup, so when switching to RT the tg_rt_schedulable() test fails
> for every cpu.rt_{runtime,period}_us change because now the autogroup
> has tasks and no runtime.
> 
> Furthermore, going back to the previous semantics of varying
> task_group() with sched_class has the down-side that the sched_debug
> output varies as well, even though the task really is in the
> autogroup.
> 
> Therefore add an autogroup exception to tg_has_rt_tasks() -- such that
> both (all) task_group() usages in sched/core now have one. And remove
> all the remnants of the variable task_group() output.
> 
> Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
> Cc: Stefan Bader <stefan.bader@canonical.com>
> Reported-by: Zefan Li <lizefan@huawei.com>
> Fixes: 8323f26ce342 ("sched: Fix race in task_group()")
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Both patches look good to me.

Thanks!


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:sched/core] sched/autogroup: Fix failure to set cpu.rt_runtime_us
  2015-02-09 11:22         ` Peter Zijlstra
  2015-02-09 11:27           ` Peter Zijlstra
  2015-02-10  1:26           ` [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us Zefan Li
@ 2015-02-18 17:09           ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 10+ messages in thread
From: tip-bot for Peter Zijlstra @ 2015-02-18 17:09 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: stefan.bader, mingo, peterz, lizefan, hpa, linux-kernel, tglx,
	umgwanakikbuti, torvalds

Commit-ID:  1fe89e1b6d270aa0d3452c60d38461ea589594e3
Gitweb:     http://git.kernel.org/tip/1fe89e1b6d270aa0d3452c60d38461ea589594e3
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Mon, 9 Feb 2015 11:53:18 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 18 Feb 2015 16:17:20 +0100

sched/autogroup: Fix failure to set cpu.rt_runtime_us

Because task_group() uses a cache of autogroup_task_group(), whose
output depends on sched_class, switching classes can generate
problems.

In particular, when started as fair, the cache points to the
autogroup, so when switching to RT the tg_rt_schedulable() test fails
for every cpu.rt_{runtime,period}_us change because now the autogroup
has tasks and no runtime.

Furthermore, going back to the previous semantics of varying
task_group() with sched_class has the down-side that the sched_debug
output varies as well, even though the task really is in the
autogroup.

Therefore add an autogroup exception to tg_has_rt_tasks() -- such that
both (all) task_group() usages in sched/core now have one. And remove
all the remnants of the variable task_group() output.

Reported-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Stefan Bader <stefan.bader@canonical.com>
Fixes: 8323f26ce342 ("sched: Fix race in task_group()")
Link: http://lkml.kernel.org/r/20150209112237.GR5029@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/auto_group.c | 6 +-----
 kernel/sched/core.c       | 6 ++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230..eae160d 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
 	 * so we don't have to move tasks around upon policy change,
 	 * or flail around trying to allocate bandwidth on the fly.
 	 * A bandwidth exception in __sched_setscheduler() allows
-	 * the policy change to proceed.  Thereafter, task_group()
-	 * returns &root_task_group, so zero bandwidth is required.
+	 * the policy change to proceed.
 	 */
 	free_rt_sched_group(tg);
 	tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	if (tg != &root_task_group)
 		return false;
 
-	if (p->sched_class != &fair_sched_class)
-		return false;
-
 	/*
 	 * We can only assume the task group can't go away on us if
 	 * autogroup_move_group() can see us on ->thread_group list.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index daaea92..03a67f0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7577,6 +7577,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
 	struct task_struct *g, *p;
 
+	/*
+	 * Autogroups do not have RT tasks; see autogroup_create().
+	 */
+	if (task_group_is_autogroup(tg))
+		return 0;
+
 	for_each_process_thread(g, p) {
 		if (rt_task(p) && task_group(p) == tg)
 			return 1;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:sched/core] sched/rt: Avoid obvious configuration fail
  2015-02-09 11:27           ` Peter Zijlstra
@ 2015-02-18 17:09             ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 10+ messages in thread
From: tip-bot for Peter Zijlstra @ 2015-02-18 17:09 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, lizefan, hpa, torvalds, tglx, linux-kernel, peterz

Commit-ID:  2636ed5f8d15ff9395731593537b4b3fdf2af24d
Gitweb:     http://git.kernel.org/tip/2636ed5f8d15ff9395731593537b4b3fdf2af24d
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Mon, 9 Feb 2015 12:23:20 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 18 Feb 2015 16:17:23 +0100

sched/rt: Avoid obvious configuration fail

Setting the root group's cpu.rt_runtime_us to 0 is a bad thing; it
would disallow the kernel creating RT tasks.

One can of course still set it to 1, which will (likely) still wreck
your kernel, but at least make it clear that setting it to 0 is not
good.

Collect both sanity checks into the one place while we're there.

Suggested-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20150209112715.GO24151@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03a67f0..a4869bd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7675,6 +7675,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
 {
 	int i, err = 0;
 
+	/*
+	 * Disallowing the root group RT runtime is BAD, it would disallow the
+	 * kernel creating (and or operating) RT threads.
+	 */
+	if (tg == &root_task_group && rt_runtime == 0)
+		return -EINVAL;
+
+	/* No period doesn't make any sense. */
+	if (rt_period == 0)
+		return -EINVAL;
+
 	mutex_lock(&rt_constraints_mutex);
 	read_lock(&tasklist_lock);
 	err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7731,9 +7742,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
 	rt_period = (u64)rt_period_us * NSEC_PER_USEC;
 	rt_runtime = tg->rt_bandwidth.rt_runtime;
 
-	if (rt_period == 0)
-		return -EINVAL;
-
 	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-02-18 17:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-05  8:33 [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us Zefan Li
2015-02-05 14:25 ` Peter Zijlstra
2015-02-06  1:30   ` Zefan Li
2015-02-06 10:58     ` Peter Zijlstra
2015-02-07  7:02       ` Zefan Li
2015-02-09 11:22         ` Peter Zijlstra
2015-02-09 11:27           ` Peter Zijlstra
2015-02-18 17:09             ` [tip:sched/core] sched/rt: Avoid obvious configuration fail tip-bot for Peter Zijlstra
2015-02-10  1:26           ` [PATCH] sched, autogroup: Fix failure when writing to cpu.rt_runtime_us Zefan Li
2015-02-18 17:09           ` [tip:sched/core] sched/autogroup: Fix failure to set cpu.rt_runtime_us tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.