All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Thomas Gleixner <tglx@linutronix.de>,
	"Paul E. McKenney" <paulmck@us.ibm.com>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [GIT PULL] locking fixes
Date: Fri, 10 Jun 2016 14:45:15 +0200	[thread overview]
Message-ID: <20160610124515.GA30903@gmail.com> (raw)

Linus,

Please pull the latest locking-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus

   # HEAD: 077fa7aed17de5022e44bf07dbaf732078b7b5b2 futex: Calculate the futex key based on a tail page for file-based futexes

Misc fixes:

 - a file-based futex fix
 - one more spin_unlock_wait() fix
 - a ww-mutex deadlock detection improvement/fix
 - and a raw_read_seqcount_latch() barrier fix.

 Thanks,

	Ingo

------------------>
Chris Wilson (1):
      locking/ww_mutex: Report recursive ww_mutex locking early

Mel Gorman (1):
      futex: Calculate the futex key based on a tail page for file-based futexes

Peter Zijlstra (2):
      locking/seqcount: Re-fix raw_read_seqcount_latch()
      locking/qspinlock: Fix spin_unlock_wait() some more


 include/asm-generic/qspinlock.h | 53 ++++++++++++------------------------
 include/linux/seqlock.h         |  7 +++--
 kernel/futex.c                  | 14 +++++++---
 kernel/locking/mutex.c          |  9 ++++---
 kernel/locking/qspinlock.c      | 60 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 99 insertions(+), 44 deletions(-)

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 6bd05700d8c9..05f05f17a7c2 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -22,37 +22,33 @@
 #include <asm-generic/qspinlock_types.h>
 
 /**
+ * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * There is a very slight possibility of live-lock if the lockers keep coming
+ * and the waiter is just unfortunate enough to not see any unlock state.
+ */
+#ifndef queued_spin_unlock_wait
+extern void queued_spin_unlock_wait(struct qspinlock *lock);
+#endif
+
+/**
  * queued_spin_is_locked - is the spinlock locked?
  * @lock: Pointer to queued spinlock structure
  * Return: 1 if it is locked, 0 otherwise
  */
+#ifndef queued_spin_is_locked
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	/*
-	 * queued_spin_lock_slowpath() can ACQUIRE the lock before
-	 * issuing the unordered store that sets _Q_LOCKED_VAL.
-	 *
-	 * See both smp_cond_acquire() sites for more detail.
-	 *
-	 * This however means that in code like:
-	 *
-	 *   spin_lock(A)		spin_lock(B)
-	 *   spin_unlock_wait(B)	spin_is_locked(A)
-	 *   do_something()		do_something()
-	 *
-	 * Both CPUs can end up running do_something() because the store
-	 * setting _Q_LOCKED_VAL will pass through the loads in
-	 * spin_unlock_wait() and/or spin_is_locked().
+	 * See queued_spin_unlock_wait().
 	 *
-	 * Avoid this by issuing a full memory barrier between the spin_lock()
-	 * and the loads in spin_unlock_wait() and spin_is_locked().
-	 *
-	 * Note that regular mutual exclusion doesn't care about this
-	 * delayed store.
+	 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
+	 * isn't immediately observable.
 	 */
-	smp_mb();
-	return atomic_read(&lock->val) & _Q_LOCKED_MASK;
+	return atomic_read(&lock->val);
 }
+#endif
 
 /**
  * queued_spin_value_unlocked - is the spinlock structure unlocked?
@@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 }
 #endif
 
-/**
- * queued_spin_unlock_wait - wait until current lock holder releases the lock
- * @lock : Pointer to queued spinlock structure
- *
- * There is a very slight possibility of live-lock if the lockers keep coming
- * and the waiter is just unfortunate enough to not see any unlock state.
- */
-static inline void queued_spin_unlock_wait(struct qspinlock *lock)
-{
-	/* See queued_spin_is_locked() */
-	smp_mb();
-	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
-		cpu_relax();
-}
-
 #ifndef virt_spin_lock
 static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 {
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 7973a821ac58..ead97654c4e9 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -277,7 +277,10 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 
 static inline int raw_read_seqcount_latch(seqcount_t *s)
 {
-	return lockless_dereference(s)->sequence;
+	int seq = READ_ONCE(s->sequence);
+	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
+	smp_read_barrier_depends();
+	return seq;
 }
 
 /**
@@ -331,7 +334,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  *	unsigned seq, idx;
  *
  *	do {
- *		seq = lockless_dereference(latch)->seq;
+ *		seq = raw_read_seqcount_latch(&latch->seq);
  *
  *		idx = seq & 0x01;
  *		entry = data_query(latch->data[idx], ...);
diff --git a/kernel/futex.c b/kernel/futex.c
index ee25f5ba4aca..33664f70e2d2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
-	struct page *page;
+	struct page *page, *tail;
 	struct address_space *mapping;
 	int err, ro = 0;
 
@@ -530,7 +530,15 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	 * considered here and page lock forces unnecessarily serialization
 	 * From this point on, mapping will be re-verified if necessary and
 	 * page lock will be acquired only if it is unavoidable
-	 */
+	 *
+	 * Mapping checks require the head page for any compound page so the
+	 * head page and mapping is looked up now. For anonymous pages, it
+	 * does not matter if the page splits in the future as the key is
+	 * based on the address. For filesystem-backed pages, the tail is
+	 * required as the index of the page determines the key. For
+	 * base pages, there is no tail page and tail == page.
+	 */
+	tail = page;
 	page = compound_head(page);
 	mapping = READ_ONCE(page->mapping);
 
@@ -654,7 +662,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 		key->shared.inode = inode;
-		key->shared.pgoff = basepage_index(page);
+		key->shared.pgoff = basepage_index(tail);
 		rcu_read_unlock();
 	}
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index e364b424b019..79d2d765a75f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
 	if (!hold_ctx)
 		return 0;
 
-	if (unlikely(ctx == hold_ctx))
-		return -EALREADY;
-
 	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
 	    (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	unsigned long flags;
 	int ret;
 
+	if (use_ww_ctx) {
+		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+		if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
+			return -EALREADY;
+	}
+
 	preempt_disable();
 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e32ae1..5fc8c311b8fe 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
 #endif
 
+/*
+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
+ *
+ * This means that the store can be delayed, but no later than the
+ * store-release from the unlock. This means that simply observing
+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
+ *
+ * There are two paths that can issue the unordered store:
+ *
+ *  (1) clear_pending_set_locked():	*,1,0 -> *,0,1
+ *
+ *  (2) set_locked():			t,0,0 -> t,0,1 ; t != 0
+ *      atomic_cmpxchg_relaxed():	t,0,0 -> 0,0,1
+ *
+ * However, in both cases we have other !0 state we've set before to queue
+ * ourseves:
+ *
+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
+ * load is constrained by that ACQUIRE to not pass before that, and thus must
+ * observe the store.
+ *
+ * For (2) we have a more intersting scenario. We enqueue ourselves using
+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
+ * sufficient, however that is followed by an smp_cond_acquire() on the same
+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
+ * guarantees we must observe that store.
+ *
+ * Therefore both cases have other !0 state that is observable before the
+ * unordered locked byte store comes through. This means we can use that to
+ * wait for the lock store, and then wait for an unlock.
+ */
+#ifndef queued_spin_unlock_wait
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+	u32 val;
+
+	for (;;) {
+		val = atomic_read(&lock->val);
+
+		if (!val) /* not locked, we're done */
+			goto done;
+
+		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+			break;
+
+		/* not locked, but pending, wait until we observe the lock */
+		cpu_relax();
+	}
+
+	/* any unlock is good */
+	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+		cpu_relax();
+
+done:
+	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
+#endif
+
 #endif /* _GEN_PV_LOCK_SLOWPATH */
 
 /**

             reply	other threads:[~2016-06-10 12:45 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-10 12:45 Ingo Molnar [this message]
  -- strict thread matches above, loose matches on Subject: below --
2023-09-22 10:12 [GIT PULL] locking fixes Ingo Molnar
2023-09-22 20:19 ` pr-tracker-bot
2021-07-11 13:22 Ingo Molnar
2021-07-11 18:22 ` pr-tracker-bot
2021-04-11 12:14 Ingo Molnar
2021-04-11 18:56 ` pr-tracker-bot
2021-03-21 10:53 Ingo Molnar
2021-03-21 18:45 ` pr-tracker-bot
2020-12-27  9:50 Ingo Molnar
2020-12-27 17:27 ` pr-tracker-bot
2020-08-15 11:13 Ingo Molnar
2020-08-16  1:55 ` pr-tracker-bot
2020-01-18 17:53 Ingo Molnar
2020-01-18 21:05 ` pr-tracker-bot
2019-12-17 11:27 Ingo Molnar
2019-12-17 19:20 ` pr-tracker-bot
2019-04-20  7:30 Ingo Molnar
2019-04-20 16:51 ` Linus Torvalds
2019-04-21 18:23   ` Ingo Molnar
2019-04-20 19:25 ` pr-tracker-bot
2019-02-10  8:53 Ingo Molnar
2019-02-10 18:30 ` pr-tracker-bot
2018-10-05  9:36 Ingo Molnar
2018-10-05 23:06 ` Greg Kroah-Hartman
2018-09-15 12:56 Ingo Molnar
2018-07-30 17:49 Ingo Molnar
2018-03-25  8:49 Ingo Molnar
2018-02-15  0:50 Ingo Molnar
2018-01-17 15:24 Ingo Molnar
2018-01-22  9:43 ` Geert Uytterhoeven
2018-01-22 10:39   ` Peter Zijlstra
2018-01-12 13:45 Ingo Molnar
2017-12-15 15:55 Ingo Molnar
2017-10-14 16:01 Ingo Molnar
2017-03-07 20:27 Ingo Molnar
2017-02-28  7:57 Ingo Molnar
2017-02-28 18:37 ` Linus Torvalds
2017-05-03 23:21 ` Linus Torvalds
2017-05-04  5:40   ` Peter Zijlstra
     [not found]     ` <CA+55aFymvtCAYHdz__3Lj=YqmORB7_A-NXrw=+h+60znJVsDTw@mail.gmail.com>
2017-05-04 22:44       ` Greg Kroah-Hartman
2016-12-07 18:42 Ingo Molnar
2016-10-18 10:55 Ingo Molnar
2016-08-18 20:34 Ingo Molnar
2016-08-12 19:32 Ingo Molnar
2016-04-28 17:52 Ingo Molnar
2016-04-23 11:22 Ingo Molnar
2016-03-24  7:47 Ingo Molnar
2015-09-17  7:57 Ingo Molnar
2015-04-18 15:15 Ingo Molnar
2015-02-20 13:37 Ingo Molnar
2015-02-21  0:03 ` Linus Torvalds
2015-02-21  1:51   ` Linus Torvalds
2015-02-23  8:35     ` Christian Borntraeger
2015-02-21  5:07   ` Ingo Molnar
2015-02-21  5:16     ` Ingo Molnar
2015-02-21  5:28       ` Ingo Molnar
2015-01-11  8:39 Ingo Molnar
2014-10-31 11:06 Ingo Molnar
2014-04-16 11:39 Ingo Molnar
2014-01-15 18:15 Ingo Molnar
2009-12-10 19:45 Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160610124515.GA30903@gmail.com \
    --to=mingo@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@us.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.