[PATCH] sched/deadline/rtmutex: Fix a PI crash for deadline tasks

* [PATCH] sched/deadline/rtmutex: Fix a PI crash for deadline tasks
@ 2016-04-01 11:00 Xunlei Pang
  2016-04-01 11:38 ` Peter Zijlstra
  0 siblings, 1 reply; 25+ messages in thread
From: Xunlei Pang @ 2016-04-01 11:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Juri Lelli, Ingo Molnar, Steven Rostedt, Xunlei Pang

I found a kernel crash while playing with deadline PI rtmutex.

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
    IP: [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30
    PGD 232a75067 PUD 230947067 PMD 0
    Oops: 0000 [#1] SMP
    CPU: 1 PID: 10994 Comm: a.out Not tainted

    Call Trace:
    [<ffffffff810cf8aa>] ? enqueue_task_dl+0x2a/0x320
    [<ffffffff810b658c>] enqueue_task+0x2c/0x80
    [<ffffffff810ba763>] activate_task+0x23/0x30
    [<ffffffff810d0ab5>] pull_dl_task+0x1d5/0x260
    [<ffffffff810d0be6>] pre_schedule_dl+0x16/0x20
    [<ffffffff8164e783>] __schedule+0xd3/0x900
    [<ffffffff8164efd9>] schedule+0x29/0x70
    [<ffffffff8165035b>] __rt_mutex_slowlock+0x4b/0xc0
    [<ffffffff81650501>] rt_mutex_slowlock+0xd1/0x190
    [<ffffffff810eeb33>] rt_mutex_timed_lock+0x53/0x60
    [<ffffffff810ecbfc>] futex_lock_pi.isra.18+0x28c/0x390
    [<ffffffff810cfa15>] ? enqueue_task_dl+0x195/0x320
    [<ffffffff810d0bac>] ? prio_changed_dl+0x6c/0x90
    [<ffffffff810ed8b0>] do_futex+0x190/0x5b0
    [<ffffffff810edd50>] SyS_futex+0x80/0x180
    [<ffffffff8165a089>] system_call_fastpath+0x16/0x1b
    RIP  [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30

This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
are only protected by pi_lock when operating pi waiters, while
rt_mutex_get_top_task() will access them with rq lock held but
not holding pi_lock.

It's hard for rt_mutex_get_top_task() to hold pi_lock, so the
patch ensures rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
lock rq when operating "pi_waiters" and "pi_waiters_leftmost".
We need this iff lock owner has the deadline priority.

Signed-off-by: Xunlei Pang <xlpang@redhat.com>
---
 include/linux/sched/deadline.h |  3 +++
 kernel/locking/rtmutex.c       | 18 ++++++++++++++++++
 kernel/sched/deadline.c        | 17 +++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2a..3083f6b 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -26,4 +26,7 @@ static inline bool dl_time_before(u64 a, u64 b)
 	return (s64)(a - b) < 0;
 }
 
+extern void *dl_pi_waiters_lock(struct task_struct *p);
+extern void dl_pi_waiters_unlock(void *lockdata);
+
 #endif /* _SCHED_DEADLINE_H */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e74660..0fb247a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -224,6 +224,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 	struct rb_node *parent = NULL;
 	struct rt_mutex_waiter *entry;
 	int leftmost = 1;
+	void *dl_lockdata = NULL;
 
 	while (*link) {
 		parent = *link;
@@ -236,24 +237,38 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 		}
 	}
 
+	if (dl_task(task))
+		dl_lockdata = dl_pi_waiters_lock(task);
+
 	if (leftmost)
 		task->pi_waiters_leftmost = &waiter->pi_tree_entry;
 
 	rb_link_node(&waiter->pi_tree_entry, parent, link);
 	rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
+
+	if (dl_lockdata)
+		dl_pi_waiters_unlock(dl_lockdata);
 }
 
 static void
 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 {
+	void *dl_lockdata = NULL;
+
 	if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
 		return;
 
+	if (dl_task(task))
+		dl_lockdata = dl_pi_waiters_lock(task);
+
 	if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
 		task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
 
 	rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
 	RB_CLEAR_NODE(&waiter->pi_tree_entry);
+
+	if (dl_lockdata)
+		dl_pi_waiters_unlock(dl_lockdata);
 }
 
 /*
@@ -271,6 +286,9 @@ int rt_mutex_getprio(struct task_struct *task)
 		   task->normal_prio);
 }
 
+/*
+ * rq->lock of @task must be held.
+ */
 struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 {
 	if (likely(!task_has_pi_waiters(task)))
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a3048fa..7b8aa93 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -926,6 +926,23 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 	__dequeue_dl_entity(dl_se);
 }
 
+/*
+ * dl_pi_waiters_lock()/dl_pi_waiters_unlock() are needed by
+ * rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi() to protect
+ * PI waiters accessed by rt_mutex_get_top_task().
+ */
+void *dl_pi_waiters_lock(struct task_struct *p)
+{
+	lockdep_assert_held(&p->pi_lock);
+
+	return __task_rq_lock(p);
+}
+
+void dl_pi_waiters_unlock(void *lockdata)
+{
+	__task_rq_unlock((struct rq *)lockdata);
+}
+
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread