linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Davidlohr Bueso <dave@stgolabs.net>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Mike Galbraith <umgwanakikbuti@gmail.com>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2] futex: lower the lock contention on the HB lock during wake up
Date: Wed, 17 Jun 2015 10:33:50 +0200	[thread overview]
Message-ID: <20150617083350.GA2433@linutronix.de> (raw)
In-Reply-To: <1434484226.1903.19.camel@stgolabs.net>

wake_futex_pi() wakes the task before releasing the hash bucket lock
(HB). The first thing the woken up task usually does is to acquire the
lock which requires the HB lock. On SMP Systems this leads to blocking
on the HB lock which is released by the owner shortly after.
This patch rearranges the unlock path by first releasing the HB lock and
then waking up the task.

[bigeasy: redo ontop of lockless wake-queues]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
* Davidlohr Bueso | 2015-06-16 12:50:26 [-0700]:

>I prefer having two separate patches, thus keeping their own changelog
>for the change justification.

okay, here it is on top of #1.

 kernel/futex.c                  | 32 +++++++++++++++++++++++---
 kernel/locking/rtmutex.c        | 51 +++++++++++++++++++++++++++++------------
 kernel/locking/rtmutex_common.h |  3 +++
 3 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index ea6ca0bca525..026594f02bd2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1117,12 +1117,15 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
 	q->lock_ptr = NULL;
 }
 
-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+			 struct futex_hash_bucket *hb)
 {
 	struct task_struct *new_owner;
 	struct futex_pi_state *pi_state = this->pi_state;
 	u32 uninitialized_var(curval), newval;
 	int ret = 0;
+	WAKE_Q(wake_q);
+	bool deboost;
 
 	if (!pi_state)
 		return -EINVAL;
@@ -1173,7 +1176,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	raw_spin_unlock_irq(&new_owner->pi_lock);
 
 	raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
-	rt_mutex_unlock(&pi_state->pi_mutex);
+
+	deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+
+	/*
+	 * First unlock HB so the waiter does not spin on it once he got woken
+	 * up. Second wake up the waiter before the priority is adjusted. If we
+	 * deboost first (and lose our higher priority), then the task might get
+	 * scheduled away before the wake up can take place.
+	 */
+	spin_unlock(&hb->lock);
+	wake_up_q(&wake_q);
+	if (deboost)
+		rt_mutex_adjust_prio(current);
 
 	return 0;
 }
@@ -2410,13 +2425,23 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 	 */
 	match = futex_top_waiter(hb, &key);
 	if (match) {
-		ret = wake_futex_pi(uaddr, uval, match);
+		ret = wake_futex_pi(uaddr, uval, match, hb);
+		/*
+		 * In case of success wake_futex_pi dropped the hash
+		 * bucket lock.
+		 */
+		if (!ret)
+			goto out_putkey;
 		/*
 		 * The atomic access to the futex value generated a
 		 * pagefault, so retry the user-access and the wakeup:
 		 */
 		if (ret == -EFAULT)
 			goto pi_faulted;
+		/*
+		 * wake_futex_pi has detected invalid state. Tell user
+		 * space.
+		 */
 		goto out_unlock;
 	}
 
@@ -2437,6 +2462,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 
 out_unlock:
 	spin_unlock(&hb->lock);
+out_putkey:
 	put_futex_key(&key);
 	return ret;
 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 74188d83b065..53fab686a1c2 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -300,7 +300,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
  * of task. We do not use the spin_xx_mutex() variants here as we are
  * outside of the debug path.)
  */
-static void rt_mutex_adjust_prio(struct task_struct *task)
+void rt_mutex_adjust_prio(struct task_struct *task)
 {
 	unsigned long flags;
 
@@ -1244,13 +1244,12 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 }
 
 /*
- * Slow path to release a rt-mutex:
+ * Slow path to release a rt-mutex.
+ * Return whether the current task needs to undo a potential priority boosting.
  */
-static void __sched
-rt_mutex_slowunlock(struct rt_mutex *lock)
+static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+					struct wake_q_head *wake_q)
 {
-	WAKE_Q(wake_q);
-
 	raw_spin_lock(&lock->wait_lock);
 
 	debug_rt_mutex_unlock(lock);
@@ -1291,7 +1290,7 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
 	while (!rt_mutex_has_waiters(lock)) {
 		/* Drops lock->wait_lock ! */
 		if (unlock_rt_mutex_safe(lock) == true)
-			return;
+			return false;
 		/* Relock the rtmutex and try again */
 		raw_spin_lock(&lock->wait_lock);
 	}
@@ -1302,13 +1301,12 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
 	 *
 	 * Queue the next waiter for wakeup once we release the wait_lock.
 	 */
-	mark_wakeup_next_waiter(&wake_q, lock);
+	mark_wakeup_next_waiter(wake_q, lock);
 
 	raw_spin_unlock(&lock->wait_lock);
-	wake_up_q(&wake_q);
 
-	/* Undo pi boosting if necessary: */
-	rt_mutex_adjust_prio(current);
+	/* check PI boosting */
+	return true;
 }
 
 /*
@@ -1359,12 +1357,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
 
 static inline void
 rt_mutex_fastunlock(struct rt_mutex *lock,
-		    void (*slowfn)(struct rt_mutex *lock))
+		    bool (*slowfn)(struct rt_mutex *lock,
+				   struct wake_q_head *wqh))
 {
-	if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
+	WAKE_Q(wake_q);
+
+	if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
 		rt_mutex_deadlock_account_unlock(current);
-	else
-		slowfn(lock);
+
+	} else if (slowfn(lock, &wake_q)) {
+		/* Undo pi boosting if necessary: */
+		rt_mutex_adjust_prio(current);
+	}
 }
 
 /**
@@ -1466,6 +1470,23 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 
 /**
+ * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
+ * @lock: the rt_mutex to be unlocked
+ *
+ * Returns: true/false indicating whether priority adjustment is
+ * required or not.
+ */
+bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
+				   struct wake_q_head *wqh)
+{
+	if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+		rt_mutex_deadlock_account_unlock(current);
+		return false;
+	}
+	return rt_mutex_slowunlock(lock, wqh);
+}
+
+/**
  * rt_mutex_destroy - mark a mutex unusable
  * @lock: the mutex to be destroyed
  *
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 855212501407..7844f8f0e639 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -131,6 +131,9 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 				      struct hrtimer_sleeper *to,
 				      struct rt_mutex_waiter *waiter);
 extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
+extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
+				  struct wake_q_head *wqh);
+extern void rt_mutex_adjust_prio(struct task_struct *task);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"
-- 
2.1.4


  reply	other threads:[~2015-06-17  8:34 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-19 17:24 [PATCH -tip 0/4] rtmutex: Spin on owner Davidlohr Bueso
2015-05-19 17:24 ` [PATCH 1/4] locking/rtmutex: Implement lockless top-waiter wakeup Davidlohr Bueso
2015-06-05 12:35   ` Thomas Gleixner
2015-06-16 19:29   ` [PATCH] futex: lower the lock contention on the HB lock during wake up Sebastian Andrzej Siewior
2015-06-16 19:50     ` Davidlohr Bueso
2015-06-17  8:33       ` Sebastian Andrzej Siewior [this message]
2015-06-17 14:17         ` [PATCH v2] " Mike Galbraith
2015-06-17 14:28           ` Sebastian Andrzej Siewior
2015-06-17 14:31             ` Mike Galbraith
2015-06-21  4:35             ` Mike Galbraith
2015-06-18 20:30         ` [tip:sched/core] futex: Lower " tip-bot for Sebastian Andrzej Siewior
2015-06-19 17:51         ` [PATCH v2] futex: lower " Kevin Hilman
2015-06-19 18:54           ` Thomas Gleixner
2015-06-19 19:32             ` Kevin Hilman
2015-06-19 19:33         ` [tip:sched/locking] futex: Lower " tip-bot for Sebastian Andrzej Siewior
2015-06-18 20:30   ` [tip:sched/core] locking/rtmutex: Implement lockless top-waiter wakeup tip-bot for Davidlohr Bueso
2015-05-19 17:24 ` [PATCH 2/4] locking/rtmutex: Use cmp-cmpxchg Davidlohr Bueso
2015-06-05 12:38   ` Thomas Gleixner
2015-06-06 15:27     ` Davidlohr Bueso
2015-06-15 18:34       ` Jason Low
2015-06-15 19:37         ` Davidlohr Bueso
2015-06-16  1:00           ` Jason Low
2015-05-19 17:24 ` [PATCH 3/4] locking/rtmutex: Update stale plist comments Davidlohr Bueso
2015-06-05 12:39   ` Thomas Gleixner
2015-06-18 20:57   ` [tip:sched/core] " tip-bot for Davidlohr Bueso
2015-06-19 19:33   ` [tip:sched/locking] " tip-bot for Davidlohr Bueso
2015-05-19 17:24 ` [PATCH -rfc 4/4] locking/rtmutex: Support spin on owner (osq) Davidlohr Bueso
2015-05-20  7:11   ` Paul Bolle
2015-05-25 20:35     ` Davidlohr Bueso
2015-05-29 15:19   ` Davidlohr Bueso
2015-05-29 18:01     ` Davidlohr Bueso
2015-06-05 13:59   ` Thomas Gleixner
2015-06-09  4:41     ` Davidlohr Bueso
2015-06-09  9:29       ` Thomas Gleixner
2015-06-09 11:21         ` Peter Zijlstra
2015-06-09 12:53           ` Thomas Gleixner
2015-05-25 20:35 ` [PATCH -tip 0/4] rtmutex: Spin on owner Davidlohr Bueso
2015-05-26 19:05   ` Thomas Gleixner
2015-09-15  1:26 [PATCH v2] futex: lower the lock contention on the HB lock during wake up Zhu Jefferry
2015-09-16  0:01 ` Thomas Gleixner
2015-09-16  0:17   ` Zhu Jefferry
2015-09-16  8:06     ` Thomas Gleixner
2015-09-16  9:52       ` Zhu Jefferry
2015-09-16 10:22         ` Thomas Gleixner
2015-09-16 11:13           ` Zhu Jefferry
2015-09-16 13:39             ` Thomas Gleixner
2015-09-16 23:57               ` Zhu Jefferry
2015-09-17  7:08                 ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150617083350.GA2433@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=dave@stgolabs.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).