All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
@ 2014-09-30 21:04 Kirill Tkhai
  2014-09-30 21:04 ` [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl() Kirill Tkhai
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Kirill Tkhai @ 2014-09-30 21:04 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Kirill Tkhai, Ingo Molnar, Juri Lelli

From: Kirill Tkhai <ktkhai@parallels.com>

hrtimer_try_to_cancel() may bring a suprise, its call may fail.

raw_spin_lock(&rq->lock)
...                            dl_task_timer                 raw_spin_lock(&rq->lock)
...                               raw_spin_lock(&rq->lock)   ...
   switched_from_dl()             ...                        ...
      hrtimer_try_to_cancel()     ...                        ...
   switched_to_fair()             ...                        ...
...                               ...                        ...
...                               ...                        ...
raw_spin_unlock(&rq->lock)        ...                        (asquired)
...                               ...                        ...
...                               ...                        ...
do_exit()                         ...                        ...
   schedule()                     ...                        ...
      raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
      ...                         ...                        ...
      raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
      ...                         ...                        (asquired)
      put_task_struct()           ...                        ...
          free_task_struct()      ...                        ...
      ...                         ...                        raw_spin_unlock(&rq->lock)
...                               (asquired)                 ...
...                               ...                        ...
...                               Surprise!!!                ...

So, let's implement 100% guaranteed way to cancel the timer and let's
be sure we are safe even in very unlikely situations.

We do not create any problem with rq unlocking, because it already
may happed below in pull_dl_task(). No problem with deadline tasks
balancing too.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
---
 kernel/sched/deadline.c |   33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index abfaf3d..63f8b4a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	if (hrtimer_active(timer)) {
-		hrtimer_try_to_cancel(timer);
-		return;
-	}
-
 	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	timer->function = dl_task_timer;
 }
@@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
 
 #endif /* CONFIG_SMP */
 
+/*
+ *  Surely cancel task's dl_timer. May drop rq->lock.
+ */
+static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
+{
+	struct hrtimer *dl_timer = &p->dl.dl_timer;
+
+	/* Nobody will change task's class if pi_lock is held */
+	lockdep_assert_held(&p->pi_lock);
+
+	if (hrtimer_active(dl_timer)) {
+		int ret = hrtimer_try_to_cancel(dl_timer);
+
+		if (unlikely(ret == -1)) {
+			/*
+			 * Note, p may migrate OR new deadline tasks
+			 * may appear in rq when we are unlocking it.
+			 */
+			raw_spin_unlock(&rq->lock);
+			hrtimer_cancel(dl_timer);
+			raw_spin_lock(&rq->lock);
+		}
+	}
+}
+
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
-	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
-		hrtimer_try_to_cancel(&p->dl.dl_timer);
+	cancel_dl_timer(rq, p);
 
 	__dl_clear_params(p);
 


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-09-30 21:04 [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Kirill Tkhai
@ 2014-09-30 21:04 ` Kirill Tkhai
  2014-10-02  9:36   ` Peter Zijlstra
  2014-09-30 21:04 ` [PATCH v2 3/3] sched/fair: Delete resched_cpu() from idle_balance() Kirill Tkhai
  2014-10-02  9:34 ` [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Peter Zijlstra
  2 siblings, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-09-30 21:04 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Kirill Tkhai, Ingo Molnar, Juri Lelli

From: Kirill Tkhai <ktkhai@parallels.com>

rq->curr task can't be in "dequeued" state in prio_changed_dl().
(The only place we can have that is __schedule()). So, we delete
rq->curr check.

Also delete "else" branch which is dead code (switched_to_dl()
is not interested in dequeued tasks and we are not interested
in balancing in this case).

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
---
 kernel/sched/deadline.c |   50 +++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 63f8b4a..38b4f19 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1638,35 +1638,33 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 			    int oldprio)
 {
-	if (task_on_rq_queued(p) || rq->curr == p) {
+	if (!task_on_rq_queued(p))
+		return;
 #ifdef CONFIG_SMP
-		/*
-		 * This might be too much, but unfortunately
-		 * we don't have the old deadline value, and
-		 * we can't argue if the task is increasing
-		 * or lowering its prio, so...
-		 */
-		if (!rq->dl.overloaded)
-			pull_dl_task(rq);
-
-		/*
-		 * If we now have a earlier deadline task than p,
-		 * then reschedule, provided p is still on this
-		 * runqueue.
-		 */
-		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
-		    rq->curr == p)
-			resched_curr(rq);
-#else
-		/*
-		 * Again, we don't know if p has a earlier
-		 * or later deadline, so let's blindly set a
-		 * (maybe not needed) rescheduling point.
-		 */
+	/*
+	 * This might be too much, but unfortunately
+	 * we don't have the old deadline value, and
+	 * we can't argue if the task is increasing
+	 * or lowering its prio, so...
+	 */
+	if (!rq->dl.overloaded)
+		pull_dl_task(rq);
+	/*
+	 * If we now have a earlier deadline task than p,
+	 * then reschedule, provided p is still on this
+	 * runqueue.
+	 */
+	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
+	    rq->curr == p)
 		resched_curr(rq);
+#else
+	/*
+	 * Again, we don't know if p has a earlier
+	 * or later deadline, so let's blindly set a
+	 * (maybe not needed) rescheduling point.
+	 */
+	resched_curr(rq);
 #endif /* CONFIG_SMP */
-	} else
-		switched_to_dl(rq, p);
 }
 
 const struct sched_class dl_sched_class = {


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 3/3] sched/fair: Delete resched_cpu() from idle_balance()
  2014-09-30 21:04 [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Kirill Tkhai
  2014-09-30 21:04 ` [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl() Kirill Tkhai
@ 2014-09-30 21:04 ` Kirill Tkhai
  2014-10-03  5:28   ` [tip:sched/core] " tip-bot for Kirill Tkhai
  2014-10-02  9:34 ` [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Peter Zijlstra
  2 siblings, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-09-30 21:04 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Kirill Tkhai, Ingo Molnar, Juri Lelli

From: Kirill Tkhai <ktkhai@parallels.com>

We already reschedule env.dst_cpu in attach_tasks()->check_preempt_curr()
if this is necessary.

Furthermore, a higher priority class task may be current on dest rq,
we shouldn't disturb it.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
---
 kernel/sched/fair.c |    6 ------
 1 file changed, 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 10a5a28..8265601 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6700,12 +6700,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 		local_irq_restore(flags);
 
-		/*
-		 * some other cpu did the load balance for us.
-		 */
-		if (cur_ld_moved && env.dst_cpu != smp_processor_id())
-			resched_cpu(env.dst_cpu);
-
 		if (env.flags & LBF_NEED_BREAK) {
 			env.flags &= ~LBF_NEED_BREAK;
 			goto more_balance;


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-09-30 21:04 [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Kirill Tkhai
  2014-09-30 21:04 ` [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl() Kirill Tkhai
  2014-09-30 21:04 ` [PATCH v2 3/3] sched/fair: Delete resched_cpu() from idle_balance() Kirill Tkhai
@ 2014-10-02  9:34 ` Peter Zijlstra
  2014-10-02 10:05   ` Kirill Tkhai
  2 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2014-10-02  9:34 UTC (permalink / raw)
  To: Kirill Tkhai; +Cc: linux-kernel, Kirill Tkhai, Ingo Molnar, Juri Lelli

On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
> From: Kirill Tkhai <ktkhai@parallels.com>
> 
> hrtimer_try_to_cancel() may bring a suprise, its call may fail.

Well, not really a surprise that, its a _try_ operation after all.

> raw_spin_lock(&rq->lock)
> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> ...                               raw_spin_lock(&rq->lock)   ...
>    switched_from_dl()             ...                        ...
>       hrtimer_try_to_cancel()     ...                        ...
>    switched_to_fair()             ...                        ...
> ...                               ...                        ...
> ...                               ...                        ...
> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> ...                               ...                        ...
> ...                               ...                        ...
> do_exit()                         ...                        ...
>    schedule()                     ...                        ...
>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>       ...                         ...                        ...
>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>       ...                         ...                        (asquired)
>       put_task_struct()           ...                        ...
>           free_task_struct()      ...                        ...
>       ...                         ...                        raw_spin_unlock(&rq->lock)
> ...                               (asquired)                 ...
> ...                               ...                        ...
> ...                               Surprise!!!                ...
> 
> So, let's implement 100% guaranteed way to cancel the timer and let's
> be sure we are safe even in very unlikely situations.
> 
> We do not create any problem with rq unlocking, because it already
> may happed below in pull_dl_task(). No problem with deadline tasks
> balancing too.

That doesn't sound right. pull_dl_task() is an entirely different
callchain than switched_from(). Now it might still be fine, but you
cannot compare it with pull_dl_task.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-09-30 21:04 ` [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl() Kirill Tkhai
@ 2014-10-02  9:36   ` Peter Zijlstra
  2014-10-02  9:52     ` Kirill Tkhai
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2014-10-02  9:36 UTC (permalink / raw)
  To: Kirill Tkhai; +Cc: linux-kernel, Kirill Tkhai, Ingo Molnar, Juri Lelli

On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
> From: Kirill Tkhai <ktkhai@parallels.com>
> 
> rq->curr task can't be in "dequeued" state in prio_changed_dl().
> (The only place we can have that is __schedule()). So, we delete
> rq->curr check.

the CBS timer can throttle it right?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-10-02  9:36   ` Peter Zijlstra
@ 2014-10-02  9:52     ` Kirill Tkhai
  2014-10-21 16:24       ` Juri Lelli
  0 siblings, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-10-02  9:52 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

В Чт, 02/10/2014 в 11:36 +0200, Peter Zijlstra пишет:
> On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
> > From: Kirill Tkhai <ktkhai@parallels.com>
> > 
> > rq->curr task can't be in "dequeued" state in prio_changed_dl().
> > (The only place we can have that is __schedule()). So, we delete
> > rq->curr check.
> 
> the CBS timer can throttle it right?

Yeah, it's better to check for on_dl_rq():

[PATCH]sched/dl: Cleanup prio_changed_dl()
    
rq->curr task can't be in "dequeued" state in prio_changed_dl().
(The only place we can have that is __schedule()). So, we delete
rq->curr check.

We shouldn't do balancing if deadline task is throttled too.

Also delete "else" branch which is dead code (switched_to_dl()
is not interested in dequeued tasks and we are not interested
in balancing in this case).
    
Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 63f8b4a..ccea917 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1638,35 +1638,33 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 			    int oldprio)
 {
-	if (task_on_rq_queued(p) || rq->curr == p) {
+	if (!on_dl_rq(&p->dl))
+		return;
 #ifdef CONFIG_SMP
-		/*
-		 * This might be too much, but unfortunately
-		 * we don't have the old deadline value, and
-		 * we can't argue if the task is increasing
-		 * or lowering its prio, so...
-		 */
-		if (!rq->dl.overloaded)
-			pull_dl_task(rq);
-
-		/*
-		 * If we now have a earlier deadline task than p,
-		 * then reschedule, provided p is still on this
-		 * runqueue.
-		 */
-		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
-		    rq->curr == p)
-			resched_curr(rq);
-#else
-		/*
-		 * Again, we don't know if p has a earlier
-		 * or later deadline, so let's blindly set a
-		 * (maybe not needed) rescheduling point.
-		 */
+	/*
+	 * This might be too much, but unfortunately
+	 * we don't have the old deadline value, and
+	 * we can't argue if the task is increasing
+	 * or lowering its prio, so...
+	 */
+	if (!rq->dl.overloaded)
+		pull_dl_task(rq);
+	/*
+	 * If we now have a earlier deadline task than p,
+	 * then reschedule, provided p is still on this
+	 * runqueue.
+	 */
+	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
+	    rq->curr == p)
 		resched_curr(rq);
+#else
+	/*
+	 * Again, we don't know if p has a earlier
+	 * or later deadline, so let's blindly set a
+	 * (maybe not needed) rescheduling point.
+	 */
+	resched_curr(rq);
 #endif /* CONFIG_SMP */
-	} else
-		switched_to_dl(rq, p);
 }
 
 const struct sched_class dl_sched_class = {



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-02  9:34 ` [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Peter Zijlstra
@ 2014-10-02 10:05   ` Kirill Tkhai
  2014-10-21 10:30     ` Juri Lelli
  0 siblings, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-10-02 10:05 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
> > From: Kirill Tkhai <ktkhai@parallels.com>
> > 
> > hrtimer_try_to_cancel() may bring a suprise, its call may fail.
> 
> Well, not really a surprise that, its a _try_ operation after all.
> 
> > raw_spin_lock(&rq->lock)
> > ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> > ...                               raw_spin_lock(&rq->lock)   ...
> >    switched_from_dl()             ...                        ...
> >       hrtimer_try_to_cancel()     ...                        ...
> >    switched_to_fair()             ...                        ...
> > ...                               ...                        ...
> > ...                               ...                        ...
> > raw_spin_unlock(&rq->lock)        ...                        (asquired)
> > ...                               ...                        ...
> > ...                               ...                        ...
> > do_exit()                         ...                        ...
> >    schedule()                     ...                        ...
> >       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >       ...                         ...                        ...
> >       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >       ...                         ...                        (asquired)
> >       put_task_struct()           ...                        ...
> >           free_task_struct()      ...                        ...
> >       ...                         ...                        raw_spin_unlock(&rq->lock)
> > ...                               (asquired)                 ...
> > ...                               ...                        ...
> > ...                               Surprise!!!                ...
> > 
> > So, let's implement 100% guaranteed way to cancel the timer and let's
> > be sure we are safe even in very unlikely situations.
> > 
> > We do not create any problem with rq unlocking, because it already
> > may happed below in pull_dl_task(). No problem with deadline tasks
> > balancing too.
> 
> That doesn't sound right. pull_dl_task() is an entirely different
> callchain than switched_from(). Now it might still be fine, but you
> cannot compare it with pull_dl_task.

I mean that caller of switched_from_dl() already knows about this situation,
and we do not limit the area of its use.

Does this sound better?

[PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
    
Currently used hrtimer_try_to_cancel() is racy:

raw_spin_lock(&rq->lock)
...                            dl_task_timer                 raw_spin_lock(&rq->lock)
...                               raw_spin_lock(&rq->lock)   ...
   switched_from_dl()             ...                        ...
      hrtimer_try_to_cancel()     ...                        ...
   switched_to_fair()             ...                        ...
...                               ...                        ...
...                               ...                        ...
raw_spin_unlock(&rq->lock)        ...                        (asquired)
...                               ...                        ...
...                               ...                        ...
do_exit()                         ...                        ...
   schedule()                     ...                        ...
      raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
      ...                         ...                        ...
      raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
      ...                         ...                        (asquired)
      put_task_struct()           ...                        ...
          free_task_struct()      ...                        ...
      ...                         ...                        raw_spin_unlock(&rq->lock)
...                               (asquired)                 ...
...                               ...                        ...
...                               (use after free)           ...

    
So, let's implement 100% guaranteed way to cancel the timer and let's
be sure we are safe even in very unlikely situations.

rq unlocking does not limit the area of switched_from_dl() use, because
it already was possible in pull_dl_task() below.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index abfaf3d..63f8b4a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	if (hrtimer_active(timer)) {
-		hrtimer_try_to_cancel(timer);
-		return;
-	}
-
 	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	timer->function = dl_task_timer;
 }
@@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
 
 #endif /* CONFIG_SMP */
 
+/*
+ *  Surely cancel task's dl_timer. May drop rq->lock.
+ */
+static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
+{
+	struct hrtimer *dl_timer = &p->dl.dl_timer;
+
+	/* Nobody will change task's class if pi_lock is held */
+	lockdep_assert_held(&p->pi_lock);
+
+	if (hrtimer_active(dl_timer)) {
+		int ret = hrtimer_try_to_cancel(dl_timer);
+
+		if (unlikely(ret == -1)) {
+			/*
+			 * Note, p may migrate OR new deadline tasks
+			 * may appear in rq when we are unlocking it.
+			 */
+			raw_spin_unlock(&rq->lock);
+			hrtimer_cancel(dl_timer);
+			raw_spin_lock(&rq->lock);
+		}
+	}
+}
+
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
-	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
-		hrtimer_try_to_cancel(&p->dl.dl_timer);
+	cancel_dl_timer(rq, p);
 
 	__dl_clear_params(p);
 



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [tip:sched/core] sched/fair: Delete resched_cpu() from idle_balance()
  2014-09-30 21:04 ` [PATCH v2 3/3] sched/fair: Delete resched_cpu() from idle_balance() Kirill Tkhai
@ 2014-10-03  5:28   ` tip-bot for Kirill Tkhai
  0 siblings, 0 replies; 19+ messages in thread
From: tip-bot for Kirill Tkhai @ 2014-10-03  5:28 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, ktkhai, hpa, mingo, peterz, tglx, juri.lelli

Commit-ID:  10a12983b3d437a6998b3845870e52c1c752c101
Gitweb:     http://git.kernel.org/tip/10a12983b3d437a6998b3845870e52c1c752c101
Author:     Kirill Tkhai <ktkhai@parallels.com>
AuthorDate: Wed, 1 Oct 2014 01:04:44 +0400
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 3 Oct 2014 05:46:56 +0200

sched/fair: Delete resched_cpu() from idle_balance()

We already reschedule env.dst_cpu in attach_tasks()->check_preempt_curr()
if this is necessary.

Furthermore, a higher priority class task may be current on dest rq,
we shouldn't disturb it.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
Cc: Juri Lelli <juri.lelli@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140930210441.5258.55054.stgit@localhost
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dfdcbfd..bd61cff 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6701,12 +6701,6 @@ more_balance:
 
 		local_irq_restore(flags);
 
-		/*
-		 * some other cpu did the load balance for us.
-		 */
-		if (cur_ld_moved && env.dst_cpu != smp_processor_id())
-			resched_cpu(env.dst_cpu);
-
 		if (env.flags & LBF_NEED_BREAK) {
 			env.flags &= ~LBF_NEED_BREAK;
 			goto more_balance;

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-02 10:05   ` Kirill Tkhai
@ 2014-10-21 10:30     ` Juri Lelli
  2014-10-21 10:48       ` Kirill Tkhai
  0 siblings, 1 reply; 19+ messages in thread
From: Juri Lelli @ 2014-10-21 10:30 UTC (permalink / raw)
  To: Kirill Tkhai, Peter Zijlstra
  Cc: Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

Hi Kirill,

sorry for the late reply, but I was busy doing other stuff and then
travelling.

On 02/10/14 11:05, Kirill Tkhai wrote:
> В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
>> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>
>>> hrtimer_try_to_cancel() may bring a suprise, its call may fail.
>>
>> Well, not really a surprise that, its a _try_ operation after all.
>>
>>> raw_spin_lock(&rq->lock)
>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
>>> ...                               raw_spin_lock(&rq->lock)   ...
>>>    switched_from_dl()             ...                        ...
>>>       hrtimer_try_to_cancel()     ...                        ...
>>>    switched_to_fair()             ...                        ...
>>> ...                               ...                        ...
>>> ...                               ...                        ...
>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
>>> ...                               ...                        ...
>>> ...                               ...                        ...
>>> do_exit()                         ...                        ...
>>>    schedule()                     ...                        ...
>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>>>       ...                         ...                        ...
>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>>>       ...                         ...                        (asquired)
>>>       put_task_struct()           ...                        ...
>>>           free_task_struct()      ...                        ...
>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
>>> ...                               (asquired)                 ...
>>> ...                               ...                        ...
>>> ...                               Surprise!!!                ...
>>>
>>> So, let's implement 100% guaranteed way to cancel the timer and let's
>>> be sure we are safe even in very unlikely situations.
>>>
>>> We do not create any problem with rq unlocking, because it already
>>> may happed below in pull_dl_task(). No problem with deadline tasks
>>> balancing too.
>>
>> That doesn't sound right. pull_dl_task() is an entirely different
>> callchain than switched_from(). Now it might still be fine, but you
>> cannot compare it with pull_dl_task.
> 
> I mean that caller of switched_from_dl() already knows about this situation,
> and we do not limit the area of its use.
> 

Not sure what you mean with "the caller already knows...". Also, can you
detail more about the different callchains?

Do you have any test for this situation? Do you experienced any crash?
As you know, the replenishment timer is of key importance for us, and
I'd like to be 100% sure we don't introduce any problems with this
change :).

Thanks a lot,

- Juri

> Does this sound better?
> 
> [PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
>     
> Currently used hrtimer_try_to_cancel() is racy:
> 
> raw_spin_lock(&rq->lock)
> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> ...                               raw_spin_lock(&rq->lock)   ...
>    switched_from_dl()             ...                        ...
>       hrtimer_try_to_cancel()     ...                        ...
>    switched_to_fair()             ...                        ...
> ...                               ...                        ...
> ...                               ...                        ...
> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> ...                               ...                        ...
> ...                               ...                        ...
> do_exit()                         ...                        ...
>    schedule()                     ...                        ...
>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>       ...                         ...                        ...
>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>       ...                         ...                        (asquired)
>       put_task_struct()           ...                        ...
>           free_task_struct()      ...                        ...
>       ...                         ...                        raw_spin_unlock(&rq->lock)
> ...                               (asquired)                 ...
> ...                               ...                        ...
> ...                               (use after free)           ...
> 
>     
> So, let's implement 100% guaranteed way to cancel the timer and let's
> be sure we are safe even in very unlikely situations.
> 
> rq unlocking does not limit the area of switched_from_dl() use, because
> it already was possible in pull_dl_task() below.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
> 
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index abfaf3d..63f8b4a 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
>  {
>  	struct hrtimer *timer = &dl_se->dl_timer;
>  
> -	if (hrtimer_active(timer)) {
> -		hrtimer_try_to_cancel(timer);
> -		return;
> -	}
> -
>  	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	timer->function = dl_task_timer;
>  }
> @@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
>  
>  #endif /* CONFIG_SMP */
>  
> +/*
> + *  Surely cancel task's dl_timer. May drop rq->lock.
> + */
> +static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
> +{
> +	struct hrtimer *dl_timer = &p->dl.dl_timer;
> +
> +	/* Nobody will change task's class if pi_lock is held */
> +	lockdep_assert_held(&p->pi_lock);
> +
> +	if (hrtimer_active(dl_timer)) {
> +		int ret = hrtimer_try_to_cancel(dl_timer);
> +
> +		if (unlikely(ret == -1)) {
> +			/*
> +			 * Note, p may migrate OR new deadline tasks
> +			 * may appear in rq when we are unlocking it.
> +			 */
> +			raw_spin_unlock(&rq->lock);
> +			hrtimer_cancel(dl_timer);
> +			raw_spin_lock(&rq->lock);
> +		}
> +	}
> +}
> +
>  static void switched_from_dl(struct rq *rq, struct task_struct *p)
>  {
> -	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
> -		hrtimer_try_to_cancel(&p->dl.dl_timer);
> +	cancel_dl_timer(rq, p);
>  
>  	__dl_clear_params(p);
>  
> 
> 
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-21 10:30     ` Juri Lelli
@ 2014-10-21 10:48       ` Kirill Tkhai
  2014-10-21 11:41         ` Juri Lelli
  0 siblings, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-10-21 10:48 UTC (permalink / raw)
  To: Juri Lelli
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

В Вт, 21/10/2014 в 11:30 +0100, Juri Lelli пишет:
> Hi Kirill,
> 
> sorry for the late reply, but I was busy doing other stuff and then
> travelling.
> 
> On 02/10/14 11:05, Kirill Tkhai wrote:
> > В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
> >> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
> >>> From: Kirill Tkhai <ktkhai@parallels.com>
> >>>
> >>> hrtimer_try_to_cancel() may bring a suprise, its call may fail.
> >>
> >> Well, not really a surprise that, its a _try_ operation after all.
> >>
> >>> raw_spin_lock(&rq->lock)
> >>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> >>> ...                               raw_spin_lock(&rq->lock)   ...
> >>>    switched_from_dl()             ...                        ...
> >>>       hrtimer_try_to_cancel()     ...                        ...
> >>>    switched_to_fair()             ...                        ...
> >>> ...                               ...                        ...
> >>> ...                               ...                        ...
> >>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> >>> ...                               ...                        ...
> >>> ...                               ...                        ...
> >>> do_exit()                         ...                        ...
> >>>    schedule()                     ...                        ...
> >>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >>>       ...                         ...                        ...
> >>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >>>       ...                         ...                        (asquired)
> >>>       put_task_struct()           ...                        ...
> >>>           free_task_struct()      ...                        ...
> >>>       ...                         ...                        raw_spin_unlock(&rq->lock)
> >>> ...                               (asquired)                 ...
> >>> ...                               ...                        ...
> >>> ...                               Surprise!!!                ...
> >>>
> >>> So, let's implement 100% guaranteed way to cancel the timer and let's
> >>> be sure we are safe even in very unlikely situations.
> >>>
> >>> We do not create any problem with rq unlocking, because it already
> >>> may happed below in pull_dl_task(). No problem with deadline tasks
> >>> balancing too.
> >>
> >> That doesn't sound right. pull_dl_task() is an entirely different
> >> callchain than switched_from(). Now it might still be fine, but you
> >> cannot compare it with pull_dl_task.
> > 
> > I mean that caller of switched_from_dl() already knows about this situation,
> > and we do not limit the area of its use.
> > 
> 
> Not sure what you mean with "the caller already knows...". Also, can you
> detail more about the different callchains?

We have only caller of switched_from_dl(). It's check_class_changed().
This function doesn't suppose that lock is always locked during its call.

What other details you want?

> 
> Do you have any test for this situation? Do you experienced any crash?
> As you know, the replenishment timer is of key importance for us, and
> I'd like to be 100% sure we don't introduce any problems with this
> change :).

No, I haven't written any tests to reproduce namely this situation.
I found it by code analyzing. The same way we fixed the problem
with rq change in dl_task_timer():

    http://www.spinics.net/lists/stable/msg49080.html

Are you agree the race is here? It's my fix, and if brings a problem
please clarify it.

I'm waiting for your reply.

Thanks,
Kirill

> > Does this sound better?
> > 
> > [PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
> >     
> > Currently used hrtimer_try_to_cancel() is racy:
> > 
> > raw_spin_lock(&rq->lock)
> > ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> > ...                               raw_spin_lock(&rq->lock)   ...
> >    switched_from_dl()             ...                        ...
> >       hrtimer_try_to_cancel()     ...                        ...
> >    switched_to_fair()             ...                        ...
> > ...                               ...                        ...
> > ...                               ...                        ...
> > raw_spin_unlock(&rq->lock)        ...                        (asquired)
> > ...                               ...                        ...
> > ...                               ...                        ...
> > do_exit()                         ...                        ...
> >    schedule()                     ...                        ...
> >       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >       ...                         ...                        ...
> >       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >       ...                         ...                        (asquired)
> >       put_task_struct()           ...                        ...
> >           free_task_struct()      ...                        ...
> >       ...                         ...                        raw_spin_unlock(&rq->lock)
> > ...                               (asquired)                 ...
> > ...                               ...                        ...
> > ...                               (use after free)           ...
> > 
> >     
> > So, let's implement 100% guaranteed way to cancel the timer and let's
> > be sure we are safe even in very unlikely situations.
> > 
> > rq unlocking does not limit the area of switched_from_dl() use, because
> > it already was possible in pull_dl_task() below.
> > 
> > Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
> > 
> > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> > index abfaf3d..63f8b4a 100644
> > --- a/kernel/sched/deadline.c
> > +++ b/kernel/sched/deadline.c
> > @@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
> >  {
> >  	struct hrtimer *timer = &dl_se->dl_timer;
> >  
> > -	if (hrtimer_active(timer)) {
> > -		hrtimer_try_to_cancel(timer);
> > -		return;
> > -	}
> > -
> >  	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> >  	timer->function = dl_task_timer;
> >  }
> > @@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
> >  
> >  #endif /* CONFIG_SMP */
> >  
> > +/*
> > + *  Surely cancel task's dl_timer. May drop rq->lock.
> > + */
> > +static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
> > +{
> > +	struct hrtimer *dl_timer = &p->dl.dl_timer;
> > +
> > +	/* Nobody will change task's class if pi_lock is held */
> > +	lockdep_assert_held(&p->pi_lock);
> > +
> > +	if (hrtimer_active(dl_timer)) {
> > +		int ret = hrtimer_try_to_cancel(dl_timer);
> > +
> > +		if (unlikely(ret == -1)) {
> > +			/*
> > +			 * Note, p may migrate OR new deadline tasks
> > +			 * may appear in rq when we are unlocking it.
> > +			 */
> > +			raw_spin_unlock(&rq->lock);
> > +			hrtimer_cancel(dl_timer);
> > +			raw_spin_lock(&rq->lock);
> > +		}
> > +	}
> > +}
> > +
> >  static void switched_from_dl(struct rq *rq, struct task_struct *p)
> >  {
> > -	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
> > -		hrtimer_try_to_cancel(&p->dl.dl_timer);
> > +	cancel_dl_timer(rq, p);
> >  
> >  	__dl_clear_params(p);
> >  
> > 
> > 
> > 
> 



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-21 10:48       ` Kirill Tkhai
@ 2014-10-21 11:41         ` Juri Lelli
  2014-10-21 14:21           ` Kirill Tkhai
  0 siblings, 1 reply; 19+ messages in thread
From: Juri Lelli @ 2014-10-21 11:41 UTC (permalink / raw)
  To: Kirill Tkhai
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

On 21/10/14 11:48, Kirill Tkhai wrote:
> В Вт, 21/10/2014 в 11:30 +0100, Juri Lelli пишет:
>> Hi Kirill,
>>
>> sorry for the late reply, but I was busy doing other stuff and then
>> travelling.
>>
>> On 02/10/14 11:05, Kirill Tkhai wrote:
>>> В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
>>>> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
>>>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>>>
>>>>> hrtimer_try_to_cancel() may bring a suprise, its call may fail.
>>>>
>>>> Well, not really a surprise that, its a _try_ operation after all.
>>>>
>>>>> raw_spin_lock(&rq->lock)
>>>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
>>>>> ...                               raw_spin_lock(&rq->lock)   ...
>>>>>    switched_from_dl()             ...                        ...
>>>>>       hrtimer_try_to_cancel()     ...                        ...
>>>>>    switched_to_fair()             ...                        ...
>>>>> ...                               ...                        ...
>>>>> ...                               ...                        ...
>>>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
>>>>> ...                               ...                        ...
>>>>> ...                               ...                        ...
>>>>> do_exit()                         ...                        ...
>>>>>    schedule()                     ...                        ...
>>>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>>>>>       ...                         ...                        ...
>>>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>>>>>       ...                         ...                        (asquired)
>>>>>       put_task_struct()           ...                        ...
>>>>>           free_task_struct()      ...                        ...
>>>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
>>>>> ...                               (asquired)                 ...
>>>>> ...                               ...                        ...
>>>>> ...                               Surprise!!!                ...
>>>>>
>>>>> So, let's implement 100% guaranteed way to cancel the timer and let's
>>>>> be sure we are safe even in very unlikely situations.
>>>>>
>>>>> We do not create any problem with rq unlocking, because it already
>>>>> may happed below in pull_dl_task(). No problem with deadline tasks
>>>>> balancing too.
>>>>
>>>> That doesn't sound right. pull_dl_task() is an entirely different
>>>> callchain than switched_from(). Now it might still be fine, but you
>>>> cannot compare it with pull_dl_task.
>>>
>>> I mean that caller of switched_from_dl() already knows about this situation,
>>> and we do not limit the area of its use.
>>>
>>
>> Not sure what you mean with "the caller already knows...". Also, can you
>> detail more about the different callchains?
> 
> We have only caller of switched_from_dl(). It's check_class_changed().
> This function doesn't suppose that lock is always locked during its call.
> 
> What other details you want?
> 

Ok, now is more clear, thanks. I was just wondering about what Peter
asked. If you can detail more about why we are still fine with it,
instead that just "it already was possible in pull_dl_task() below",
that would be nice to have.

Also, check_class_changed() is called from several places
(rt_mutex_setprio() for example), are we fine with all this callplaces
as well?

>>
>> Do you have any test for this situation? Do you experienced any crash?
>> As you know, the replenishment timer is of key importance for us, and
>> I'd like to be 100% sure we don't introduce any problems with this
>> change :).
> 
> No, I haven't written any tests to reproduce namely this situation.
> I found it by code analyzing. The same way we fixed the problem
> with rq change in dl_task_timer():
> 
>     http://www.spinics.net/lists/stable/msg49080.html
>

Yeah, but I did write a test for that race:

 "Juri Lelli reports he got this race when dl_bandwidth_enabled()
  was not set."

And after that I felt more confident about the change :).

> Are you agree the race is here? It's my fix, and if brings a problem
> please clarify it.
> 

Yeah, it seems that the race may happen. I'm just saying that it would
be nice to see it happening before we fix the thing. I wish I have some
time to try to setup a test. Even if I can't spot any problems with your
patch, apart from small comments below, not being completely confident
that this doesn't introduce regression elsewhere brought me to ask from
more details.

> I'm waiting for your reply.
> 
> Thanks,
> Kirill
> 
>>> Does this sound better?
>>>
>>> [PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
>>>     
>>> Currently used hrtimer_try_to_cancel() is racy:
>>>
>>> raw_spin_lock(&rq->lock)
>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
>>> ...                               raw_spin_lock(&rq->lock)   ...
>>>    switched_from_dl()             ...                        ...
>>>       hrtimer_try_to_cancel()     ...                        ...
>>>    switched_to_fair()             ...                        ...
>>> ...                               ...                        ...
>>> ...                               ...                        ...
>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
>>> ...                               ...                        ...
>>> ...                               ...                        ...
>>> do_exit()                         ...                        ...
>>>    schedule()                     ...                        ...
>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>>>       ...                         ...                        ...
>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>>>       ...                         ...                        (asquired)
>>>       put_task_struct()           ...                        ...
>>>           free_task_struct()      ...                        ...
>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
>>> ...                               (asquired)                 ...
>>> ...                               ...                        ...
>>> ...                               (use after free)           ...
>>>
>>>     
>>> So, let's implement 100% guaranteed way to cancel the timer and let's
>>> be sure we are safe even in very unlikely situations.
>>>
>>> rq unlocking does not limit the area of switched_from_dl() use, because
>>> it already was possible in pull_dl_task() below.
>>>
>>> Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
>>>
>>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>>> index abfaf3d..63f8b4a 100644
>>> --- a/kernel/sched/deadline.c
>>> +++ b/kernel/sched/deadline.c
>>> @@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
>>>  {
>>>  	struct hrtimer *timer = &dl_se->dl_timer;
>>>  
>>> -	if (hrtimer_active(timer)) {
>>> -		hrtimer_try_to_cancel(timer);
>>> -		return;
>>> -	}
>>> -
>>>  	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>>  	timer->function = dl_task_timer;
>>>  }
>>> @@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
>>>  
>>>  #endif /* CONFIG_SMP */
>>>  
>>> +/*
>>> + *  Surely cancel task's dl_timer. May drop rq->lock.
>>> + */

Maybe we can add comments explaining why we are fine releasing the lock
here.

>>> +static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
>>> +{
>>> +	struct hrtimer *dl_timer = &p->dl.dl_timer;
>>> +
>>> +	/* Nobody will change task's class if pi_lock is held */
>>> +	lockdep_assert_held(&p->pi_lock);
>>> +
>>> +	if (hrtimer_active(dl_timer)) {
>>> +		int ret = hrtimer_try_to_cancel(dl_timer);
>>> +
>>> +		if (unlikely(ret == -1)) {
>>> +			/*
>>> +			 * Note, p may migrate OR new deadline tasks
>>> +			 * may appear in rq when we are unlocking it.
>>> +			 */

Yeah, some comments also here on why this is all good?

Thanks a lot Kirill!

Best,

- Juri

>>> +			raw_spin_unlock(&rq->lock);
>>> +			hrtimer_cancel(dl_timer);
>>> +			raw_spin_lock(&rq->lock);
>>> +		}
>>> +	}
>>> +}
>>> +
>>>  static void switched_from_dl(struct rq *rq, struct task_struct *p)
>>>  {
>>> -	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
>>> -		hrtimer_try_to_cancel(&p->dl.dl_timer);
>>> +	cancel_dl_timer(rq, p);
>>>  
>>>  	__dl_clear_params(p);
>>>  
>>>
>>>
>>>
>>
> 
> 
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-21 11:41         ` Juri Lelli
@ 2014-10-21 14:21           ` Kirill Tkhai
  2014-10-22 10:00             ` Juri Lelli
  0 siblings, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-10-21 14:21 UTC (permalink / raw)
  To: Juri Lelli
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

В Вт, 21/10/2014 в 12:41 +0100, Juri Lelli пишет:
> On 21/10/14 11:48, Kirill Tkhai wrote:
> > В Вт, 21/10/2014 в 11:30 +0100, Juri Lelli пишет:
> >> Hi Kirill,
> >>
> >> sorry for the late reply, but I was busy doing other stuff and then
> >> travelling.
> >>
> >> On 02/10/14 11:05, Kirill Tkhai wrote:
> >>> В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
> >>>> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
> >>>>> From: Kirill Tkhai <ktkhai@parallels.com>
> >>>>>
> >>>>> hrtimer_try_to_cancel() may bring a suprise, its call may fail.
> >>>>
> >>>> Well, not really a surprise that, its a _try_ operation after all.
> >>>>
> >>>>> raw_spin_lock(&rq->lock)
> >>>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> >>>>> ...                               raw_spin_lock(&rq->lock)   ...
> >>>>>    switched_from_dl()             ...                        ...
> >>>>>       hrtimer_try_to_cancel()     ...                        ...
> >>>>>    switched_to_fair()             ...                        ...
> >>>>> ...                               ...                        ...
> >>>>> ...                               ...                        ...
> >>>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> >>>>> ...                               ...                        ...
> >>>>> ...                               ...                        ...
> >>>>> do_exit()                         ...                        ...
> >>>>>    schedule()                     ...                        ...
> >>>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >>>>>       ...                         ...                        ...
> >>>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >>>>>       ...                         ...                        (asquired)
> >>>>>       put_task_struct()           ...                        ...
> >>>>>           free_task_struct()      ...                        ...
> >>>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
> >>>>> ...                               (asquired)                 ...
> >>>>> ...                               ...                        ...
> >>>>> ...                               Surprise!!!                ...
> >>>>>
> >>>>> So, let's implement 100% guaranteed way to cancel the timer and let's
> >>>>> be sure we are safe even in very unlikely situations.
> >>>>>
> >>>>> We do not create any problem with rq unlocking, because it already
> >>>>> may happed below in pull_dl_task(). No problem with deadline tasks
> >>>>> balancing too.
> >>>>
> >>>> That doesn't sound right. pull_dl_task() is an entirely different
> >>>> callchain than switched_from(). Now it might still be fine, but you
> >>>> cannot compare it with pull_dl_task.
> >>>
> >>> I mean that caller of switched_from_dl() already knows about this situation,
> >>> and we do not limit the area of its use.
> >>>
> >>
> >> Not sure what you mean with "the caller already knows...". Also, can you
> >> detail more about the different callchains?
> > 
> > We have only caller of switched_from_dl(). It's check_class_changed().
> > This function doesn't suppose that lock is always locked during its call.
> > 
> > What other details you want?
> > 
> 
> Ok, now is more clear, thanks. I was just wondering about what Peter
> asked. If you can detail more about why we are still fine with it,
> instead that just "it already was possible in pull_dl_task() below",
> that would be nice to have.
> 
> Also, check_class_changed() is called from several places
> (rt_mutex_setprio() for example), are we fine with all this callplaces
> as well?

Yeah. New code in the patch is working when hrtimer_try_to_cancel() fails.
This means the callback is running. In this case hrtimer_cancel() is just
waiting till the callback is finished.

Since we are in switched_from_dl(), new class is not dl_sched_class and
new prio is not less MAX_DL_PRIO. So, the callback returns early just
after !dl_task() check. After that hrtimer_cancel() returns back too.

The above is:

raw_spin_lock(rq->lock);                  ...
...                                       dl_task_timer()
...                                          raw_spin_lock(rq->lock);
   switched_from_dl()                        ...
       hrtimer_try_to_cancel()               ...
          raw_spin_unlock(rq->lock);         ...  
          hrtimer_cancel()                   ...
          ...                                raw_spin_unlock(rq->lock);
          ...                                return HRTIMER_NORESTART;
          ...                             ...
          raw_spin_lock(rq->lock);        ...   


But the below is also possible:
                                   dl_task_timer()
                                      raw_spin_lock(rq->lock);
                                      ...
                                      raw_spin_unlock(rq->lock);
raw_spin_lock(rq->lock);              ...
   switched_from_dl()                 ...
       hrtimer_try_to_cancel()        ...
       ...                            return HRTIMER_NORESTART;
       raw_spin_unlock(rq->lock);  ...
       hrtimer_cancel();           ...
       raw_spin_lock(rq->lock);    ...

In this case hrtimer_cancel() returns immediately. Very unlikely case,
just to mention.


Nobody can manipulate the task, because check_class_changed() is
always called with pi_lock locked. Nobody can force the task to
participate in (concurrent) priority inheritance schemes (the same reason).

All concurrent task operations require pi_lock, which is held by us.
No deadlocks with dl_task_timer() are possible, because it returns
right after !dl_task() check (it does nothing).

> >>
> >> Do you have any test for this situation? Do you experienced any crash?
> >> As you know, the replenishment timer is of key importance for us, and
> >> I'd like to be 100% sure we don't introduce any problems with this
> >> change :).
> > 
> > No, I haven't written any tests to reproduce namely this situation.
> > I found it by code analyzing. The same way we fixed the problem
> > with rq change in dl_task_timer():
> > 
> >     http://www.spinics.net/lists/stable/msg49080.html
> >
> 
> Yeah, but I did write a test for that race:
> 
>  "Juri Lelli reports he got this race when dl_bandwidth_enabled()
>   was not set."
> 
> And after that I felt more confident about the change :).

Ok, good. I forgot.

> > Are you agree the race is here? It's my fix, and if brings a problem
> > please clarify it.
> > 
> 
> Yeah, it seems that the race may happen. I'm just saying that it would
> be nice to see it happening before we fix the thing. I wish I have some
> time to try to setup a test. Even if I can't spot any problems with your
> patch, apart from small comments below, not being completely confident
> that this doesn't introduce regression elsewhere brought me to ask from
> more details.

Sadly, I have no time to write a test for this bug. I can change the comment
and add the description I posted above. Or I can add more description
if you say what should be added else.

> 
> > I'm waiting for your reply.
> > 
> > Thanks,
> > Kirill
> > 
> >>> Does this sound better?
> >>>
> >>> [PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
> >>>     
> >>> Currently used hrtimer_try_to_cancel() is racy:
> >>>
> >>> raw_spin_lock(&rq->lock)
> >>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> >>> ...                               raw_spin_lock(&rq->lock)   ...
> >>>    switched_from_dl()             ...                        ...
> >>>       hrtimer_try_to_cancel()     ...                        ...
> >>>    switched_to_fair()             ...                        ...
> >>> ...                               ...                        ...
> >>> ...                               ...                        ...
> >>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> >>> ...                               ...                        ...
> >>> ...                               ...                        ...
> >>> do_exit()                         ...                        ...
> >>>    schedule()                     ...                        ...
> >>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >>>       ...                         ...                        ...
> >>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >>>       ...                         ...                        (asquired)
> >>>       put_task_struct()           ...                        ...
> >>>           free_task_struct()      ...                        ...
> >>>       ...                         ...                        raw_spin_unlock(&rq->lock)
> >>> ...                               (asquired)                 ...
> >>> ...                               ...                        ...
> >>> ...                               (use after free)           ...
> >>>
> >>>     
> >>> So, let's implement 100% guaranteed way to cancel the timer and let's
> >>> be sure we are safe even in very unlikely situations.
> >>>
> >>> rq unlocking does not limit the area of switched_from_dl() use, because
> >>> it already was possible in pull_dl_task() below.
> >>>
> >>> Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
> >>>
> >>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> >>> index abfaf3d..63f8b4a 100644
> >>> --- a/kernel/sched/deadline.c
> >>> +++ b/kernel/sched/deadline.c
> >>> @@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
> >>>  {
> >>>  	struct hrtimer *timer = &dl_se->dl_timer;
> >>>  
> >>> -	if (hrtimer_active(timer)) {
> >>> -		hrtimer_try_to_cancel(timer);
> >>> -		return;
> >>> -	}
> >>> -
> >>>  	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> >>>  	timer->function = dl_task_timer;
> >>>  }
> >>> @@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
> >>>  
> >>>  #endif /* CONFIG_SMP */
> >>>  
> >>> +/*
> >>> + *  Surely cancel task's dl_timer. May drop rq->lock.
> >>> + */
> 
> Maybe we can add comments explaining why we are fine releasing the lock
> here.
> 
> >>> +static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
> >>> +{
> >>> +	struct hrtimer *dl_timer = &p->dl.dl_timer;
> >>> +
> >>> +	/* Nobody will change task's class if pi_lock is held */
> >>> +	lockdep_assert_held(&p->pi_lock);
> >>> +
> >>> +	if (hrtimer_active(dl_timer)) {
> >>> +		int ret = hrtimer_try_to_cancel(dl_timer);
> >>> +
> >>> +		if (unlikely(ret == -1)) {
> >>> +			/*
> >>> +			 * Note, p may migrate OR new deadline tasks
> >>> +			 * may appear in rq when we are unlocking it.
> >>> +			 */
> 
> Yeah, some comments also here on why this is all good?
> 
> Thanks a lot Kirill!
> 
> Best,
> 
> - Juri
> 
> >>> +			raw_spin_unlock(&rq->lock);
> >>> +			hrtimer_cancel(dl_timer);
> >>> +			raw_spin_lock(&rq->lock);
> >>> +		}
> >>> +	}
> >>> +}
> >>> +
> >>>  static void switched_from_dl(struct rq *rq, struct task_struct *p)
> >>>  {
> >>> -	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
> >>> -		hrtimer_try_to_cancel(&p->dl.dl_timer);
> >>> +	cancel_dl_timer(rq, p);
> >>>  
> >>>  	__dl_clear_params(p);
> >>>  
> >>>
> >>>
> >>>
> >>
> > 
> > 
> > 
> 



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-10-02  9:52     ` Kirill Tkhai
@ 2014-10-21 16:24       ` Juri Lelli
  2014-10-21 16:33         ` Kirill Tkhai
  2014-10-23 23:04         ` Wanpeng Li
  0 siblings, 2 replies; 19+ messages in thread
From: Juri Lelli @ 2014-10-21 16:24 UTC (permalink / raw)
  To: Kirill Tkhai, Peter Zijlstra
  Cc: Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

Hi Kirill,

On 02/10/14 10:52, Kirill Tkhai wrote:
> В Чт, 02/10/2014 в 11:36 +0200, Peter Zijlstra пишет:
>> On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>
>>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>>> (The only place we can have that is __schedule()). So, we delete
>>> rq->curr check.
>>
>> the CBS timer can throttle it right?
> 
> Yeah, it's better to check for on_dl_rq():
> 
> [PATCH]sched/dl: Cleanup prio_changed_dl()
>     
> rq->curr task can't be in "dequeued" state in prio_changed_dl().
> (The only place we can have that is __schedule()). So, we delete
> rq->curr check.
> 
> We shouldn't do balancing if deadline task is throttled too.
> 
> Also delete "else" branch which is dead code (switched_to_dl()
> is not interested in dequeued tasks and we are not interested
> in balancing in this case).
> 

So, I agree that calling switched_to_dl() makes little sense,
but don't we have to deal with updates to not running tasks as
in rt.c? Something like this maybe?

>From 75ee75a5fd76526baaed3ba8a58f3ff7daa89cd6 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Tue, 21 Oct 2014 17:15:15 +0100
Subject: [PATCH] sched/deadline: cleanup prio_changed_dl()

---
 kernel/sched/deadline.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 28d6088..1e62e31 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1661,7 +1661,10 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 			    int oldprio)
 {
-	if (task_on_rq_queued(p) || rq->curr == p) {
+	if (!on_dl_rq(&p->dl))
+		return;
+
+	if (rq->curr == p) {
 #ifdef CONFIG_SMP
 		/*
 		 * This might be too much, but unfortunately
@@ -1688,8 +1691,15 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 		 */
 		resched_curr(rq);
 #endif /* CONFIG_SMP */
-	} else
-		switched_to_dl(rq, p);
+	} else {
+		/*
+		 * This task is not running, so if its deadline is
+		 * now more imminent than that of the current running
+		 * task then reschedule.
+		 */
+		if (dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
+			resched_curr(rq);
+	}
 }
 
 const struct sched_class dl_sched_class = {
-- 
2.1.0

Thanks,

- Juri

> Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
> 
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 63f8b4a..ccea917 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1638,35 +1638,33 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>  static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>  			    int oldprio)
>  {
> -	if (task_on_rq_queued(p) || rq->curr == p) {
> +	if (!on_dl_rq(&p->dl))
> +		return;
>  #ifdef CONFIG_SMP
> -		/*
> -		 * This might be too much, but unfortunately
> -		 * we don't have the old deadline value, and
> -		 * we can't argue if the task is increasing
> -		 * or lowering its prio, so...
> -		 */
> -		if (!rq->dl.overloaded)
> -			pull_dl_task(rq);
> -
> -		/*
> -		 * If we now have a earlier deadline task than p,
> -		 * then reschedule, provided p is still on this
> -		 * runqueue.
> -		 */
> -		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
> -		    rq->curr == p)
> -			resched_curr(rq);
> -#else
> -		/*
> -		 * Again, we don't know if p has a earlier
> -		 * or later deadline, so let's blindly set a
> -		 * (maybe not needed) rescheduling point.
> -		 */
> +	/*
> +	 * This might be too much, but unfortunately
> +	 * we don't have the old deadline value, and
> +	 * we can't argue if the task is increasing
> +	 * or lowering its prio, so...
> +	 */
> +	if (!rq->dl.overloaded)
> +		pull_dl_task(rq);
> +	/*
> +	 * If we now have a earlier deadline task than p,
> +	 * then reschedule, provided p is still on this
> +	 * runqueue.
> +	 */
> +	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
> +	    rq->curr == p)
>  		resched_curr(rq);
> +#else
> +	/*
> +	 * Again, we don't know if p has a earlier
> +	 * or later deadline, so let's blindly set a
> +	 * (maybe not needed) rescheduling point.
> +	 */
> +	resched_curr(rq);
>  #endif /* CONFIG_SMP */
> -	} else
> -		switched_to_dl(rq, p);
>  }
>  
>  const struct sched_class dl_sched_class = {
> 
> 
> 


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-10-21 16:24       ` Juri Lelli
@ 2014-10-21 16:33         ` Kirill Tkhai
  2014-10-22  9:33           ` Juri Lelli
  2014-10-23 23:04         ` Wanpeng Li
  1 sibling, 1 reply; 19+ messages in thread
From: Kirill Tkhai @ 2014-10-21 16:33 UTC (permalink / raw)
  To: Juri Lelli
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

В Вт, 21/10/2014 в 17:24 +0100, Juri Lelli пишет:
> Hi Kirill,
> 
> On 02/10/14 10:52, Kirill Tkhai wrote:
> > В Чт, 02/10/2014 в 11:36 +0200, Peter Zijlstra пишет:
> >> On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
> >>> From: Kirill Tkhai <ktkhai@parallels.com>
> >>>
> >>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
> >>> (The only place we can have that is __schedule()). So, we delete
> >>> rq->curr check.
> >>
> >> the CBS timer can throttle it right?
> > 
> > Yeah, it's better to check for on_dl_rq():
> > 
> > [PATCH]sched/dl: Cleanup prio_changed_dl()
> >     
> > rq->curr task can't be in "dequeued" state in prio_changed_dl().
> > (The only place we can have that is __schedule()). So, we delete
> > rq->curr check.
> > 
> > We shouldn't do balancing if deadline task is throttled too.
> > 
> > Also delete "else" branch which is dead code (switched_to_dl()
> > is not interested in dequeued tasks and we are not interested
> > in balancing in this case).
> > 
> 
> So, I agree that calling switched_to_dl() makes little sense,
> but don't we have to deal with updates to not running tasks as
> in rt.c? Something like this maybe?

Looks good. No objections from me.

> 
> From 75ee75a5fd76526baaed3ba8a58f3ff7daa89cd6 Mon Sep 17 00:00:00 2001
> From: Juri Lelli <juri.lelli@arm.com>
> Date: Tue, 21 Oct 2014 17:15:15 +0100
> Subject: [PATCH] sched/deadline: cleanup prio_changed_dl()
> 
> ---
>  kernel/sched/deadline.c | 16 +++++++++++++---
>  1 file changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 28d6088..1e62e31 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1661,7 +1661,10 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>  static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>  			    int oldprio)
>  {
> -	if (task_on_rq_queued(p) || rq->curr == p) {
> +	if (!on_dl_rq(&p->dl))
> +		return;
> +
> +	if (rq->curr == p) {
>  #ifdef CONFIG_SMP
>  		/*
>  		 * This might be too much, but unfortunately
> @@ -1688,8 +1691,15 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>  		 */
>  		resched_curr(rq);
>  #endif /* CONFIG_SMP */
> -	} else
> -		switched_to_dl(rq, p);
> +	} else {
> +		/*
> +		 * This task is not running, so if its deadline is
> +		 * now more imminent than that of the current running
> +		 * task then reschedule.
> +		 */
> +		if (dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
> +			resched_curr(rq);
> +	}
>  }
>  
>  const struct sched_class dl_sched_class = {



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-10-21 16:33         ` Kirill Tkhai
@ 2014-10-22  9:33           ` Juri Lelli
  0 siblings, 0 replies; 19+ messages in thread
From: Juri Lelli @ 2014-10-22  9:33 UTC (permalink / raw)
  To: Kirill Tkhai
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

On 21/10/14 17:33, Kirill Tkhai wrote:
> В Вт, 21/10/2014 в 17:24 +0100, Juri Lelli пишет:
>> Hi Kirill,
>>
>> On 02/10/14 10:52, Kirill Tkhai wrote:
>>> В Чт, 02/10/2014 в 11:36 +0200, Peter Zijlstra пишет:
>>>> On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
>>>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>>>
>>>>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>>>>> (The only place we can have that is __schedule()). So, we delete
>>>>> rq->curr check.
>>>>
>>>> the CBS timer can throttle it right?
>>>
>>> Yeah, it's better to check for on_dl_rq():
>>>
>>> [PATCH]sched/dl: Cleanup prio_changed_dl()
>>>     
>>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>>> (The only place we can have that is __schedule()). So, we delete
>>> rq->curr check.
>>>
>>> We shouldn't do balancing if deadline task is throttled too.
>>>
>>> Also delete "else" branch which is dead code (switched_to_dl()
>>> is not interested in dequeued tasks and we are not interested
>>> in balancing in this case).
>>>
>>
>> So, I agree that calling switched_to_dl() makes little sense,
>> but don't we have to deal with updates to not running tasks as
>> in rt.c? Something like this maybe?
> 
> Looks good. No objections from me.
>

Ok, thanks Kirill. I'll send out a proper patch soon.

Best,

- Juri

>>
>> From 75ee75a5fd76526baaed3ba8a58f3ff7daa89cd6 Mon Sep 17 00:00:00 2001
>> From: Juri Lelli <juri.lelli@arm.com>
>> Date: Tue, 21 Oct 2014 17:15:15 +0100
>> Subject: [PATCH] sched/deadline: cleanup prio_changed_dl()
>>
>> ---
>>  kernel/sched/deadline.c | 16 +++++++++++++---
>>  1 file changed, 13 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 28d6088..1e62e31 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -1661,7 +1661,10 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>>  static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>>  			    int oldprio)
>>  {
>> -	if (task_on_rq_queued(p) || rq->curr == p) {
>> +	if (!on_dl_rq(&p->dl))
>> +		return;
>> +
>> +	if (rq->curr == p) {
>>  #ifdef CONFIG_SMP
>>  		/*
>>  		 * This might be too much, but unfortunately
>> @@ -1688,8 +1691,15 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>>  		 */
>>  		resched_curr(rq);
>>  #endif /* CONFIG_SMP */
>> -	} else
>> -		switched_to_dl(rq, p);
>> +	} else {
>> +		/*
>> +		 * This task is not running, so if its deadline is
>> +		 * now more imminent than that of the current running
>> +		 * task then reschedule.
>> +		 */
>> +		if (dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
>> +			resched_curr(rq);
>> +	}
>>  }
>>  
>>  const struct sched_class dl_sched_class = {
> 
> 
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-21 14:21           ` Kirill Tkhai
@ 2014-10-22 10:00             ` Juri Lelli
  2014-10-23  8:39               ` Kirill Tkhai
  0 siblings, 1 reply; 19+ messages in thread
From: Juri Lelli @ 2014-10-22 10:00 UTC (permalink / raw)
  To: Kirill Tkhai
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

On 21/10/14 15:21, Kirill Tkhai wrote:
> В Вт, 21/10/2014 в 12:41 +0100, Juri Lelli пишет:
>> On 21/10/14 11:48, Kirill Tkhai wrote:
>>> В Вт, 21/10/2014 в 11:30 +0100, Juri Lelli пишет:
>>>> Hi Kirill,
>>>>
>>>> sorry for the late reply, but I was busy doing other stuff and then
>>>> travelling.
>>>>
>>>> On 02/10/14 11:05, Kirill Tkhai wrote:
>>>>> В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
>>>>>> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
>>>>>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>>>>>
>>>>>>> hrtimer_try_to_cancel() may bring a suprise, its call may fail.
>>>>>>
>>>>>> Well, not really a surprise that, its a _try_ operation after all.
>>>>>>
>>>>>>> raw_spin_lock(&rq->lock)
>>>>>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
>>>>>>> ...                               raw_spin_lock(&rq->lock)   ...
>>>>>>>    switched_from_dl()             ...                        ...
>>>>>>>       hrtimer_try_to_cancel()     ...                        ...
>>>>>>>    switched_to_fair()             ...                        ...
>>>>>>> ...                               ...                        ...
>>>>>>> ...                               ...                        ...
>>>>>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
>>>>>>> ...                               ...                        ...
>>>>>>> ...                               ...                        ...
>>>>>>> do_exit()                         ...                        ...
>>>>>>>    schedule()                     ...                        ...
>>>>>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>>>>>>>       ...                         ...                        ...
>>>>>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>>>>>>>       ...                         ...                        (asquired)
>>>>>>>       put_task_struct()           ...                        ...
>>>>>>>           free_task_struct()      ...                        ...
>>>>>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
>>>>>>> ...                               (asquired)                 ...
>>>>>>> ...                               ...                        ...
>>>>>>> ...                               Surprise!!!                ...
>>>>>>>
>>>>>>> So, let's implement 100% guaranteed way to cancel the timer and let's
>>>>>>> be sure we are safe even in very unlikely situations.
>>>>>>>
>>>>>>> We do not create any problem with rq unlocking, because it already
>>>>>>> may happed below in pull_dl_task(). No problem with deadline tasks
>>>>>>> balancing too.
>>>>>>
>>>>>> That doesn't sound right. pull_dl_task() is an entirely different
>>>>>> callchain than switched_from(). Now it might still be fine, but you
>>>>>> cannot compare it with pull_dl_task.
>>>>>
>>>>> I mean that caller of switched_from_dl() already knows about this situation,
>>>>> and we do not limit the area of its use.
>>>>>
>>>>
>>>> Not sure what you mean with "the caller already knows...". Also, can you
>>>> detail more about the different callchains?
>>>
>>> We have only caller of switched_from_dl(). It's check_class_changed().
>>> This function doesn't suppose that lock is always locked during its call.
>>>
>>> What other details you want?
>>>
>>
>> Ok, now is more clear, thanks. I was just wondering about what Peter
>> asked. If you can detail more about why we are still fine with it,
>> instead that just "it already was possible in pull_dl_task() below",
>> that would be nice to have.
>>
>> Also, check_class_changed() is called from several places
>> (rt_mutex_setprio() for example), are we fine with all this callplaces
>> as well?
> 
> Yeah. New code in the patch is working when hrtimer_try_to_cancel() fails.
> This means the callback is running. In this case hrtimer_cancel() is just
> waiting till the callback is finished.
> 
> Since we are in switched_from_dl(), new class is not dl_sched_class and
> new prio is not less MAX_DL_PRIO. So, the callback returns early just
> after !dl_task() check. After that hrtimer_cancel() returns back too.
> 
> The above is:
> 
> raw_spin_lock(rq->lock);                  ...
> ...                                       dl_task_timer()
> ...                                          raw_spin_lock(rq->lock);
>    switched_from_dl()                        ...
>        hrtimer_try_to_cancel()               ...
>           raw_spin_unlock(rq->lock);         ...  
>           hrtimer_cancel()                   ...
>           ...                                raw_spin_unlock(rq->lock);
>           ...                                return HRTIMER_NORESTART;
>           ...                             ...
>           raw_spin_lock(rq->lock);        ...   
> 
> 
> But the below is also possible:
>                                    dl_task_timer()
>                                       raw_spin_lock(rq->lock);
>                                       ...
>                                       raw_spin_unlock(rq->lock);
> raw_spin_lock(rq->lock);              ...
>    switched_from_dl()                 ...
>        hrtimer_try_to_cancel()        ...
>        ...                            return HRTIMER_NORESTART;
>        raw_spin_unlock(rq->lock);  ...
>        hrtimer_cancel();           ...
>        raw_spin_lock(rq->lock);    ...
> 
> In this case hrtimer_cancel() returns immediately. Very unlikely case,
> just to mention.
> 
> 
> Nobody can manipulate the task, because check_class_changed() is
> always called with pi_lock locked. Nobody can force the task to
> participate in (concurrent) priority inheritance schemes (the same reason).
> 
> All concurrent task operations require pi_lock, which is held by us.
> No deadlocks with dl_task_timer() are possible, because it returns
> right after !dl_task() check (it does nothing).
>

Ok, it looks right to me. It would be nice to have what above and the
original explanation of the bug in the changelog.

>>>>
>>>> Do you have any test for this situation? Do you experienced any crash?
>>>> As you know, the replenishment timer is of key importance for us, and
>>>> I'd like to be 100% sure we don't introduce any problems with this
>>>> change :).
>>>
>>> No, I haven't written any tests to reproduce namely this situation.
>>> I found it by code analyzing. The same way we fixed the problem
>>> with rq change in dl_task_timer():
>>>
>>>     http://www.spinics.net/lists/stable/msg49080.html
>>>
>>
>> Yeah, but I did write a test for that race:
>>
>>  "Juri Lelli reports he got this race when dl_bandwidth_enabled()
>>   was not set."
>>
>> And after that I felt more confident about the change :).
> 
> Ok, good. I forgot.
> 
>>> Are you agree the race is here? It's my fix, and if brings a problem
>>> please clarify it.
>>>
>>
>> Yeah, it seems that the race may happen. I'm just saying that it would
>> be nice to see it happening before we fix the thing. I wish I have some
>> time to try to setup a test. Even if I can't spot any problems with your
>> patch, apart from small comments below, not being completely confident
>> that this doesn't introduce regression elsewhere brought me to ask from
>> more details.
> 
> Sadly, I have no time to write a test for this bug. I can change the comment
> and add the description I posted above. Or I can add more description
> if you say what should be added else.
> 

So, if you are ok with it, I'd say I can take some time to do a little
testing anyway, as the bug is there, but nobody (except you) noticed
that yet :).

>>
>>> I'm waiting for your reply.
>>>
>>> Thanks,
>>> Kirill
>>>
>>>>> Does this sound better?
>>>>>
>>>>> [PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
>>>>>     
>>>>> Currently used hrtimer_try_to_cancel() is racy:
>>>>>
>>>>> raw_spin_lock(&rq->lock)
>>>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
>>>>> ...                               raw_spin_lock(&rq->lock)   ...
>>>>>    switched_from_dl()             ...                        ...
>>>>>       hrtimer_try_to_cancel()     ...                        ...
>>>>>    switched_to_fair()             ...                        ...
>>>>> ...                               ...                        ...
>>>>> ...                               ...                        ...
>>>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
>>>>> ...                               ...                        ...
>>>>> ...                               ...                        ...
>>>>> do_exit()                         ...                        ...
>>>>>    schedule()                     ...                        ...
>>>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
>>>>>       ...                         ...                        ...
>>>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
>>>>>       ...                         ...                        (asquired)
>>>>>       put_task_struct()           ...                        ...
>>>>>           free_task_struct()      ...                        ...
>>>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
>>>>> ...                               (asquired)                 ...
>>>>> ...                               ...                        ...
>>>>> ...                               (use after free)           ...
>>>>>
>>>>>     
>>>>> So, let's implement 100% guaranteed way to cancel the timer and let's
>>>>> be sure we are safe even in very unlikely situations.
>>>>>
>>>>> rq unlocking does not limit the area of switched_from_dl() use, because
>>>>> it already was possible in pull_dl_task() below.
>>>>>
>>>>> Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
>>>>>
>>>>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>>>>> index abfaf3d..63f8b4a 100644
>>>>> --- a/kernel/sched/deadline.c
>>>>> +++ b/kernel/sched/deadline.c
>>>>> @@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
>>>>>  {
>>>>>  	struct hrtimer *timer = &dl_se->dl_timer;
>>>>>  
>>>>> -	if (hrtimer_active(timer)) {
>>>>> -		hrtimer_try_to_cancel(timer);
>>>>> -		return;
>>>>> -	}
>>>>> -
>>>>>  	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>>>>  	timer->function = dl_task_timer;
>>>>>  }
>>>>> @@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
>>>>>  
>>>>>  #endif /* CONFIG_SMP */
>>>>>  
>>>>> +/*
>>>>> + *  Surely cancel task's dl_timer. May drop rq->lock.
>>>>> + */
>>
>> Maybe we can add comments explaining why we are fine releasing the lock
>> here.
>>

Does "Ensure p's dl_timer is cancelled. May drop rq->lock." sound better?

>>>>> +static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
>>>>> +{
>>>>> +	struct hrtimer *dl_timer = &p->dl.dl_timer;
>>>>> +
>>>>> +	/* Nobody will change task's class if pi_lock is held */
>>>>> +	lockdep_assert_held(&p->pi_lock);
>>>>> +
>>>>> +	if (hrtimer_active(dl_timer)) {
>>>>> +		int ret = hrtimer_try_to_cancel(dl_timer);
>>>>> +
>>>>> +		if (unlikely(ret == -1)) {
>>>>> +			/*
>>>>> +			 * Note, p may migrate OR new deadline tasks
>>>>> +			 * may appear in rq when we are unlocking it.
>>>>> +			 */
>>
>> Yeah, some comments also here on why this is all good?
>>

Here you say what may happen. Can you add something saying why we are
fine with this happening? Just for future reference...

Thanks again!

Best,

- Juri

>> Thanks a lot Kirill!
>>
>> Best,
>>
>> - Juri
>>
>>>>> +			raw_spin_unlock(&rq->lock);
>>>>> +			hrtimer_cancel(dl_timer);
>>>>> +			raw_spin_lock(&rq->lock);
>>>>> +		}
>>>>> +	}
>>>>> +}
>>>>> +
>>>>>  static void switched_from_dl(struct rq *rq, struct task_struct *p)
>>>>>  {
>>>>> -	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
>>>>> -		hrtimer_try_to_cancel(&p->dl.dl_timer);
>>>>> +	cancel_dl_timer(rq, p);
>>>>>  
>>>>>  	__dl_clear_params(p);
>>>>>  
>>>>>
>>>>>
>>>>>
>>>>
>>>
>>>
>>>
>>
> 
> 
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
  2014-10-22 10:00             ` Juri Lelli
@ 2014-10-23  8:39               ` Kirill Tkhai
  0 siblings, 0 replies; 19+ messages in thread
From: Kirill Tkhai @ 2014-10-23  8:39 UTC (permalink / raw)
  To: Juri Lelli
  Cc: Peter Zijlstra, Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

В Ср, 22/10/2014 в 11:00 +0100, Juri Lelli пишет:
> On 21/10/14 15:21, Kirill Tkhai wrote:
> > В Вт, 21/10/2014 в 12:41 +0100, Juri Lelli пишет:
> >> On 21/10/14 11:48, Kirill Tkhai wrote:
> >>> В Вт, 21/10/2014 в 11:30 +0100, Juri Lelli пишет:
> >>>> Hi Kirill,
> >>>>
> >>>> sorry for the late reply, but I was busy doing other stuff and then
> >>>> travelling.
> >>>>
> >>>> On 02/10/14 11:05, Kirill Tkhai wrote:
> >>>>> В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
> >>>>>> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
> >>>>>>> From: Kirill Tkhai <ktkhai@parallels.com>
> >>>>>>>
> >>>>>>> hrtimer_try_to_cancel() may bring a suprise, its call may fail.
> >>>>>>
> >>>>>> Well, not really a surprise that, its a _try_ operation after all.
> >>>>>>
> >>>>>>> raw_spin_lock(&rq->lock)
> >>>>>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> >>>>>>> ...                               raw_spin_lock(&rq->lock)   ...
> >>>>>>>    switched_from_dl()             ...                        ...
> >>>>>>>       hrtimer_try_to_cancel()     ...                        ...
> >>>>>>>    switched_to_fair()             ...                        ...
> >>>>>>> ...                               ...                        ...
> >>>>>>> ...                               ...                        ...
> >>>>>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> >>>>>>> ...                               ...                        ...
> >>>>>>> ...                               ...                        ...
> >>>>>>> do_exit()                         ...                        ...
> >>>>>>>    schedule()                     ...                        ...
> >>>>>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >>>>>>>       ...                         ...                        ...
> >>>>>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >>>>>>>       ...                         ...                        (asquired)
> >>>>>>>       put_task_struct()           ...                        ...
> >>>>>>>           free_task_struct()      ...                        ...
> >>>>>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
> >>>>>>> ...                               (asquired)                 ...
> >>>>>>> ...                               ...                        ...
> >>>>>>> ...                               Surprise!!!                ...
> >>>>>>>
> >>>>>>> So, let's implement 100% guaranteed way to cancel the timer and let's
> >>>>>>> be sure we are safe even in very unlikely situations.
> >>>>>>>
> >>>>>>> We do not create any problem with rq unlocking, because it already
> >>>>>>> may happed below in pull_dl_task(). No problem with deadline tasks
> >>>>>>> balancing too.
> >>>>>>
> >>>>>> That doesn't sound right. pull_dl_task() is an entirely different
> >>>>>> callchain than switched_from(). Now it might still be fine, but you
> >>>>>> cannot compare it with pull_dl_task.
> >>>>>
> >>>>> I mean that caller of switched_from_dl() already knows about this situation,
> >>>>> and we do not limit the area of its use.
> >>>>>
> >>>>
> >>>> Not sure what you mean with "the caller already knows...". Also, can you
> >>>> detail more about the different callchains?
> >>>
> >>> We have only caller of switched_from_dl(). It's check_class_changed().
> >>> This function doesn't suppose that lock is always locked during its call.
> >>>
> >>> What other details you want?
> >>>
> >>
> >> Ok, now is more clear, thanks. I was just wondering about what Peter
> >> asked. If you can detail more about why we are still fine with it,
> >> instead that just "it already was possible in pull_dl_task() below",
> >> that would be nice to have.
> >>
> >> Also, check_class_changed() is called from several places
> >> (rt_mutex_setprio() for example), are we fine with all this callplaces
> >> as well?
> > 
> > Yeah. New code in the patch is working when hrtimer_try_to_cancel() fails.
> > This means the callback is running. In this case hrtimer_cancel() is just
> > waiting till the callback is finished.
> > 
> > Since we are in switched_from_dl(), new class is not dl_sched_class and
> > new prio is not less MAX_DL_PRIO. So, the callback returns early just
> > after !dl_task() check. After that hrtimer_cancel() returns back too.
> > 
> > The above is:
> > 
> > raw_spin_lock(rq->lock);                  ...
> > ...                                       dl_task_timer()
> > ...                                          raw_spin_lock(rq->lock);
> >    switched_from_dl()                        ...
> >        hrtimer_try_to_cancel()               ...
> >           raw_spin_unlock(rq->lock);         ...  
> >           hrtimer_cancel()                   ...
> >           ...                                raw_spin_unlock(rq->lock);
> >           ...                                return HRTIMER_NORESTART;
> >           ...                             ...
> >           raw_spin_lock(rq->lock);        ...   
> > 
> > 
> > But the below is also possible:
> >                                    dl_task_timer()
> >                                       raw_spin_lock(rq->lock);
> >                                       ...
> >                                       raw_spin_unlock(rq->lock);
> > raw_spin_lock(rq->lock);              ...
> >    switched_from_dl()                 ...
> >        hrtimer_try_to_cancel()        ...
> >        ...                            return HRTIMER_NORESTART;
> >        raw_spin_unlock(rq->lock);  ...
> >        hrtimer_cancel();           ...
> >        raw_spin_lock(rq->lock);    ...
> > 
> > In this case hrtimer_cancel() returns immediately. Very unlikely case,
> > just to mention.
> > 
> > 
> > Nobody can manipulate the task, because check_class_changed() is
> > always called with pi_lock locked. Nobody can force the task to
> > participate in (concurrent) priority inheritance schemes (the same reason).
> > 
> > All concurrent task operations require pi_lock, which is held by us.
> > No deadlocks with dl_task_timer() are possible, because it returns
> > right after !dl_task() check (it does nothing).
> >
> 
> Ok, it looks right to me. It would be nice to have what above and the
> original explanation of the bug in the changelog.

I'll send new patch with your remarks.

> >>>>
> >>>> Do you have any test for this situation? Do you experienced any crash?
> >>>> As you know, the replenishment timer is of key importance for us, and
> >>>> I'd like to be 100% sure we don't introduce any problems with this
> >>>> change :).
> >>>
> >>> No, I haven't written any tests to reproduce namely this situation.
> >>> I found it by code analyzing. The same way we fixed the problem
> >>> with rq change in dl_task_timer():
> >>>
> >>>     http://www.spinics.net/lists/stable/msg49080.html
> >>>
> >>
> >> Yeah, but I did write a test for that race:
> >>
> >>  "Juri Lelli reports he got this race when dl_bandwidth_enabled()
> >>   was not set."
> >>
> >> And after that I felt more confident about the change :).
> > 
> > Ok, good. I forgot.
> > 
> >>> Are you agree the race is here? It's my fix, and if brings a problem
> >>> please clarify it.
> >>>
> >>
> >> Yeah, it seems that the race may happen. I'm just saying that it would
> >> be nice to see it happening before we fix the thing. I wish I have some
> >> time to try to setup a test. Even if I can't spot any problems with your
> >> patch, apart from small comments below, not being completely confident
> >> that this doesn't introduce regression elsewhere brought me to ask from
> >> more details.
> > 
> > Sadly, I have no time to write a test for this bug. I can change the comment
> > and add the description I posted above. Or I can add more description
> > if you say what should be added else.
> > 
> 
> So, if you are ok with it, I'd say I can take some time to do a little
> testing anyway, as the bug is there, but nobody (except you) noticed
> that yet :).
> 
> >>
> >>> I'm waiting for your reply.
> >>>
> >>> Thanks,
> >>> Kirill
> >>>
> >>>>> Does this sound better?
> >>>>>
> >>>>> [PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
> >>>>>     
> >>>>> Currently used hrtimer_try_to_cancel() is racy:
> >>>>>
> >>>>> raw_spin_lock(&rq->lock)
> >>>>> ...                            dl_task_timer                 raw_spin_lock(&rq->lock)
> >>>>> ...                               raw_spin_lock(&rq->lock)   ...
> >>>>>    switched_from_dl()             ...                        ...
> >>>>>       hrtimer_try_to_cancel()     ...                        ...
> >>>>>    switched_to_fair()             ...                        ...
> >>>>> ...                               ...                        ...
> >>>>> ...                               ...                        ...
> >>>>> raw_spin_unlock(&rq->lock)        ...                        (asquired)
> >>>>> ...                               ...                        ...
> >>>>> ...                               ...                        ...
> >>>>> do_exit()                         ...                        ...
> >>>>>    schedule()                     ...                        ...
> >>>>>       raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
> >>>>>       ...                         ...                        ...
> >>>>>       raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
> >>>>>       ...                         ...                        (asquired)
> >>>>>       put_task_struct()           ...                        ...
> >>>>>           free_task_struct()      ...                        ...
> >>>>>       ...                         ...                        raw_spin_unlock(&rq->lock)
> >>>>> ...                               (asquired)                 ...
> >>>>> ...                               ...                        ...
> >>>>> ...                               (use after free)           ...
> >>>>>
> >>>>>     
> >>>>> So, let's implement 100% guaranteed way to cancel the timer and let's
> >>>>> be sure we are safe even in very unlikely situations.
> >>>>>
> >>>>> rq unlocking does not limit the area of switched_from_dl() use, because
> >>>>> it already was possible in pull_dl_task() below.
> >>>>>
> >>>>> Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
> >>>>>
> >>>>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> >>>>> index abfaf3d..63f8b4a 100644
> >>>>> --- a/kernel/sched/deadline.c
> >>>>> +++ b/kernel/sched/deadline.c
> >>>>> @@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
> >>>>>  {
> >>>>>  	struct hrtimer *timer = &dl_se->dl_timer;
> >>>>>  
> >>>>> -	if (hrtimer_active(timer)) {
> >>>>> -		hrtimer_try_to_cancel(timer);
> >>>>> -		return;
> >>>>> -	}
> >>>>> -
> >>>>>  	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> >>>>>  	timer->function = dl_task_timer;
> >>>>>  }
> >>>>> @@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
> >>>>>  
> >>>>>  #endif /* CONFIG_SMP */
> >>>>>  
> >>>>> +/*
> >>>>> + *  Surely cancel task's dl_timer. May drop rq->lock.
> >>>>> + */
> >>
> >> Maybe we can add comments explaining why we are fine releasing the lock
> >> here.
> >>
> 
> Does "Ensure p's dl_timer is cancelled. May drop rq->lock." sound better?
> 
> >>>>> +static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
> >>>>> +{
> >>>>> +	struct hrtimer *dl_timer = &p->dl.dl_timer;
> >>>>> +
> >>>>> +	/* Nobody will change task's class if pi_lock is held */
> >>>>> +	lockdep_assert_held(&p->pi_lock);
> >>>>> +
> >>>>> +	if (hrtimer_active(dl_timer)) {
> >>>>> +		int ret = hrtimer_try_to_cancel(dl_timer);
> >>>>> +
> >>>>> +		if (unlikely(ret == -1)) {
> >>>>> +			/*
> >>>>> +			 * Note, p may migrate OR new deadline tasks
> >>>>> +			 * may appear in rq when we are unlocking it.
> >>>>> +			 */
> >>
> >> Yeah, some comments also here on why this is all good?
> >>
> 
> Here you say what may happen. Can you add something saying why we are
> fine with this happening? Just for future reference...

Thanks!
Kirill


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-10-21 16:24       ` Juri Lelli
  2014-10-21 16:33         ` Kirill Tkhai
@ 2014-10-23 23:04         ` Wanpeng Li
  2014-10-24  9:26           ` Juri Lelli
  1 sibling, 1 reply; 19+ messages in thread
From: Wanpeng Li @ 2014-10-23 23:04 UTC (permalink / raw)
  To: Juri Lelli, Kirill Tkhai, Peter Zijlstra
  Cc: Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

10/22/14, 12:24 AM, Juri Lelli:
> Hi Kirill,
>
> On 02/10/14 10:52, Kirill Tkhai wrote:
>> В Чт, 02/10/2014 в 11:36 +0200, Peter Zijlstra пишет:
>>> On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
>>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>>
>>>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>>>> (The only place we can have that is __schedule()). So, we delete
>>>> rq->curr check.
>>> the CBS timer can throttle it right?
>> Yeah, it's better to check for on_dl_rq():
>>
>> [PATCH]sched/dl: Cleanup prio_changed_dl()
>>      
>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>> (The only place we can have that is __schedule()). So, we delete
>> rq->curr check.
>>
>> We shouldn't do balancing if deadline task is throttled too.
>>
>> Also delete "else" branch which is dead code (switched_to_dl()
>> is not interested in dequeued tasks and we are not interested
>> in balancing in this case).
>>
> So, I agree that calling switched_to_dl() makes little sense,
> but don't we have to deal with updates to not running tasks as
> in rt.c? Something like this maybe?
>
>  From 75ee75a5fd76526baaed3ba8a58f3ff7daa89cd6 Mon Sep 17 00:00:00 2001
> From: Juri Lelli <juri.lelli@arm.com>
> Date: Tue, 21 Oct 2014 17:15:15 +0100
> Subject: [PATCH] sched/deadline: cleanup prio_changed_dl()
>
> ---
>   kernel/sched/deadline.c | 16 +++++++++++++---
>   1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 28d6088..1e62e31 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1661,7 +1661,10 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>   static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>   			    int oldprio)
>   {
> -	if (task_on_rq_queued(p) || rq->curr == p) {
> +	if (!on_dl_rq(&p->dl))

I'm not sure if this should be task_on_rq_queued() check. All 
check_class_changed() callsites dequeue entity if task_on_rq_queued() is 
true which leads to on_dl_rq(&p->dl) always return false.

Regards,
Wanpeng Li

> +		return;
> +
> +	if (rq->curr == p) {
>   #ifdef CONFIG_SMP
>   		/*
>   		 * This might be too much, but unfortunately
> @@ -1688,8 +1691,15 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>   		 */
>   		resched_curr(rq);
>   #endif /* CONFIG_SMP */
> -	} else
> -		switched_to_dl(rq, p);
> +	} else {
> +		/*
> +		 * This task is not running, so if its deadline is
> +		 * now more imminent than that of the current running
> +		 * task then reschedule.
> +		 */
> +		if (dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
> +			resched_curr(rq);
> +	}
>   }
>   
>   const struct sched_class dl_sched_class = {


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl()
  2014-10-23 23:04         ` Wanpeng Li
@ 2014-10-24  9:26           ` Juri Lelli
  0 siblings, 0 replies; 19+ messages in thread
From: Juri Lelli @ 2014-10-24  9:26 UTC (permalink / raw)
  To: Wanpeng Li, Kirill Tkhai, Peter Zijlstra
  Cc: Kirill Tkhai, linux-kernel, Ingo Molnar, Juri Lelli

Hi,

On 24/10/14 00:04, Wanpeng Li wrote:
> 10/22/14, 12:24 AM, Juri Lelli:
>> Hi Kirill,
>>
>> On 02/10/14 10:52, Kirill Tkhai wrote:
>>> В Чт, 02/10/2014 в 11:36 +0200, Peter Zijlstra пишет:
>>>> On Wed, Oct 01, 2014 at 01:04:35AM +0400, Kirill Tkhai wrote:
>>>>> From: Kirill Tkhai <ktkhai@parallels.com>
>>>>>
>>>>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>>>>> (The only place we can have that is __schedule()). So, we delete
>>>>> rq->curr check.
>>>> the CBS timer can throttle it right?
>>> Yeah, it's better to check for on_dl_rq():
>>>
>>> [PATCH]sched/dl: Cleanup prio_changed_dl()
>>>      
>>> rq->curr task can't be in "dequeued" state in prio_changed_dl().
>>> (The only place we can have that is __schedule()). So, we delete
>>> rq->curr check.
>>>
>>> We shouldn't do balancing if deadline task is throttled too.
>>>
>>> Also delete "else" branch which is dead code (switched_to_dl()
>>> is not interested in dequeued tasks and we are not interested
>>> in balancing in this case).
>>>
>> So, I agree that calling switched_to_dl() makes little sense,
>> but don't we have to deal with updates to not running tasks as
>> in rt.c? Something like this maybe?
>>
>>  From 75ee75a5fd76526baaed3ba8a58f3ff7daa89cd6 Mon Sep 17 00:00:00 2001
>> From: Juri Lelli <juri.lelli@arm.com>
>> Date: Tue, 21 Oct 2014 17:15:15 +0100
>> Subject: [PATCH] sched/deadline: cleanup prio_changed_dl()
>>
>> ---
>>   kernel/sched/deadline.c | 16 +++++++++++++---
>>   1 file changed, 13 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 28d6088..1e62e31 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -1661,7 +1661,10 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
>>   static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>>   			    int oldprio)
>>   {
>> -	if (task_on_rq_queued(p) || rq->curr == p) {
>> +	if (!on_dl_rq(&p->dl))
> 
> I'm not sure if this should be task_on_rq_queued() check. All 
> check_class_changed() callsites dequeue entity if task_on_rq_queued() is 
> true which leads to on_dl_rq(&p->dl) always return false.
> 

Yes, to be able to change class/prio. But they also enqueue it back if
it was on_rq. So, on_dl_rq() helps us when the tasks is throttled.

Thanks,

- Juri

> Regards,
> Wanpeng Li
> 
>> +		return;
>> +
>> +	if (rq->curr == p) {
>>   #ifdef CONFIG_SMP
>>   		/*
>>   		 * This might be too much, but unfortunately
>> @@ -1688,8 +1691,15 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
>>   		 */
>>   		resched_curr(rq);
>>   #endif /* CONFIG_SMP */
>> -	} else
>> -		switched_to_dl(rq, p);
>> +	} else {
>> +		/*
>> +		 * This task is not running, so if its deadline is
>> +		 * now more imminent than that of the current running
>> +		 * task then reschedule.
>> +		 */
>> +		if (dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
>> +			resched_curr(rq);
>> +	}
>>   }
>>   
>>   const struct sched_class dl_sched_class = {
> 
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2014-10-24  9:26 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-30 21:04 [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Kirill Tkhai
2014-09-30 21:04 ` [PATCH v2 2/3] sched/dl: Cleanup prio_changed_dl() Kirill Tkhai
2014-10-02  9:36   ` Peter Zijlstra
2014-10-02  9:52     ` Kirill Tkhai
2014-10-21 16:24       ` Juri Lelli
2014-10-21 16:33         ` Kirill Tkhai
2014-10-22  9:33           ` Juri Lelli
2014-10-23 23:04         ` Wanpeng Li
2014-10-24  9:26           ` Juri Lelli
2014-09-30 21:04 ` [PATCH v2 3/3] sched/fair: Delete resched_cpu() from idle_balance() Kirill Tkhai
2014-10-03  5:28   ` [tip:sched/core] " tip-bot for Kirill Tkhai
2014-10-02  9:34 ` [PATCH v2 1/3] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl() Peter Zijlstra
2014-10-02 10:05   ` Kirill Tkhai
2014-10-21 10:30     ` Juri Lelli
2014-10-21 10:48       ` Kirill Tkhai
2014-10-21 11:41         ` Juri Lelli
2014-10-21 14:21           ` Kirill Tkhai
2014-10-22 10:00             ` Juri Lelli
2014-10-23  8:39               ` Kirill Tkhai

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.