From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755401AbbBQMbp (ORCPT ); Tue, 17 Feb 2015 07:31:45 -0500 Received: from casper.infradead.org ([85.118.1.10]:60026 "EHLO casper.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752459AbbBQMbo (ORCPT ); Tue, 17 Feb 2015 07:31:44 -0500 Date: Tue, 17 Feb 2015 13:31:39 +0100 From: Peter Zijlstra To: Kirill Tkhai Cc: linux-kernel@vger.kernel.org, Ingo Molnar , Josh Poimboeuf Subject: [PATCH] sched: Make dl_task_time() use task_rq_lock() Message-ID: <20150217123139.GN5029@twins.programming.kicks-ass.net> References: <1424170011.5749.20.camel@tkhai> <20150217111116.GL5029@twins.programming.kicks-ass.net> <20150217112616.GU24151@twins.programming.kicks-ass.net> <1424172838.5749.27.camel@tkhai> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1424172838.5749.27.camel@tkhai> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Tue, Feb 17, 2015 at 02:33:58PM +0300, Kirill Tkhai wrote: > So, we move task_rq_lock() to sched.h, and dl_task_timer() uses it? Yep, like this. I've also modified your earlier patch to dl_task_time() back to its original form. --- Subject: sched: Make dl_task_time() use task_rq_lock() From: Peter Zijlstra Date: Tue Feb 17 13:22:25 CET 2015 Kirill reported that a dl task can be throttled and dequeued at the same time. This happens, when it becomes throttled in schedule(), which is called to go to sleep: current->state = TASK_INTERRUPTIBLE; schedule() deactivate_task() dequeue_task_dl() update_curr_dl() start_dl_timer() __dequeue_task_dl() prev->on_rq = 0; This invalidates the assumption from commit 0f397f2c90ce ("sched/dl: Fix race in dl_task_timer()"): "The only reason we don't strictly need ->pi_lock now is because we're guaranteed to have p->state == TASK_RUNNING here and are thus free of ttwu races". And therefore we have to use the full task_rq_lock() here. This further amends the fact that we forgot to update the rq lock loop for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state"). Reported-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/core.c | 76 ------------------------------------------------ kernel/sched/deadline.c | 12 +------ kernel/sched/sched.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 85 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -307,82 +307,6 @@ __read_mostly int scheduler_running; int sysctl_sched_rt_runtime = 950000; /* - * __task_rq_lock - lock the rq @p resides on. - */ -static inline struct rq *__task_rq_lock(struct task_struct *p) - __acquires(rq->lock) -{ - struct rq *rq; - - lockdep_assert_held(&p->pi_lock); - - for (;;) { - rq = task_rq(p); - raw_spin_lock(&rq->lock); - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) - return rq; - raw_spin_unlock(&rq->lock); - - while (unlikely(task_on_rq_migrating(p))) - cpu_relax(); - } -} - -/* - * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. - */ -static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) - __acquires(p->pi_lock) - __acquires(rq->lock) -{ - struct rq *rq; - - for (;;) { - raw_spin_lock_irqsave(&p->pi_lock, *flags); - rq = task_rq(p); - raw_spin_lock(&rq->lock); - /* - * move_queued_task() task_rq_lock() - * - * ACQUIRE (rq->lock) - * [S] ->on_rq = MIGRATING [L] rq = task_rq() - * WMB (__set_task_cpu()) ACQUIRE (rq->lock); - * [S] ->cpu = new_cpu [L] task_rq() - * [L] ->on_rq - * RELEASE (rq->lock) - * - * If we observe the old cpu in task_rq_lock, the acquire of - * the old rq->lock will fully serialize against the stores. - * - * If we observe the new cpu in task_rq_lock, the acquire will - * pair with the WMB to ensure we must then also see migrating. - */ - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) - return rq; - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); - - while (unlikely(task_on_rq_migrating(p))) - cpu_relax(); - } -} - -static void __task_rq_unlock(struct rq *rq) - __releases(rq->lock) -{ - raw_spin_unlock(&rq->lock); -} - -static inline void -task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) - __releases(rq->lock) - __releases(p->pi_lock) -{ - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); -} - -/* * this_rq_lock - lock this runqueue and disable interrupts. */ static struct rq *this_rq_lock(void) --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_time struct sched_dl_entity, dl_timer); struct task_struct *p = dl_task_of(dl_se); + unsigned long flags; struct rq *rq; -again: - rq = task_rq(p); - raw_spin_lock(&rq->lock); - if (rq != task_rq(p)) { - /* Task was moved, retrying. */ - raw_spin_unlock(&rq->lock); - goto again; - } + rq = task_rq_lock(current, &flags); /* * We need to take care of several possible races here: @@ -555,7 +549,7 @@ static enum hrtimer_restart dl_task_time push_dl_task(rq); #endif unlock: - raw_spin_unlock(&rq->lock); + task_rq_unlock(rq, current, &flags); return HRTIMER_NORESTART; } --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1484,6 +1484,82 @@ static inline void double_raw_lock(raw_s } /* + * __task_rq_lock - lock the rq @p resides on. + */ +static inline struct rq *__task_rq_lock(struct task_struct *p) + __acquires(rq->lock) +{ + struct rq *rq; + + lockdep_assert_held(&p->pi_lock); + + for (;;) { + rq = task_rq(p); + raw_spin_lock(&rq->lock); + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) + return rq; + raw_spin_unlock(&rq->lock); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +/* + * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. + */ +static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) + __acquires(p->pi_lock) + __acquires(rq->lock) +{ + struct rq *rq; + + for (;;) { + raw_spin_lock_irqsave(&p->pi_lock, *flags); + rq = task_rq(p); + raw_spin_lock(&rq->lock); + /* + * move_queued_task() task_rq_lock() + * + * ACQUIRE (rq->lock) + * [S] ->on_rq = MIGRATING [L] rq = task_rq() + * WMB (__set_task_cpu()) ACQUIRE (rq->lock); + * [S] ->cpu = new_cpu [L] task_rq() + * [L] ->on_rq + * RELEASE (rq->lock) + * + * If we observe the old cpu in task_rq_lock, the acquire of + * the old rq->lock will fully serialize against the stores. + * + * If we observe the new cpu in task_rq_lock, the acquire will + * pair with the WMB to ensure we must then also see migrating. + */ + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) + return rq; + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +static inline void __task_rq_unlock(struct rq *rq) + __releases(rq->lock) +{ + raw_spin_unlock(&rq->lock); +} + +static inline void +task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) + __releases(rq->lock) + __releases(p->pi_lock) +{ + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); +} + +/* * double_rq_lock - safely lock two runqueues * * Note this does not disable interrupts like task_rq_lock,