linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Will Deacon <will@kernel.org>, Jan Kara <jack@suse.cz>,
	Waiman Long <longman@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Steven Rostedt <rostedt@goodmis.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Catalin Marinas <catalin.marinas@arm.com>
Subject: Re: Crash with PREEMPT_RT on aarch64 machine
Date: Thu, 1 Dec 2022 17:09:41 +0000	[thread overview]
Message-ID: <20221201170941.zxorepk6zjrhprg4@suse.de> (raw)
In-Reply-To: <20221130202204.usku3rl6wowiugju@suse.de>

On Wed, Nov 30, 2022 at 08:22:10PM +0000, Mel Gorman wrote:
> >  extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
> > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> > index 7779ee8abc2a0..e3cc673e0c988 100644
> > --- a/kernel/locking/rtmutex.c
> > +++ b/kernel/locking/rtmutex.c
> > @@ -97,7 +97,7 @@ rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
> >  	if (rt_mutex_has_waiters(lock))
> >  		val |= RT_MUTEX_HAS_WAITERS;
> >  
> > -	WRITE_ONCE(lock->owner, (struct task_struct *)val);
> > +	WARN_ON_ONCE(cmpxchg_acquire(&lock->owner, RT_MUTEX_HAS_WAITERS, val) != RT_MUTEX_HAS_WAITERS);
> >  }
> >  
> >  static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
> > @@ -106,6 +106,17 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
> >  			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
> >  }
> >  
> > +static __always_inline void
> > +rt_mutex_set_owner_pi(struct rt_mutex_base *lock, struct task_struct *owner)
> > +{
> 
> What does pi mean in this context? I think the naming here might
> misleading. rt_mutex_set_owner_pi is used when initialising and when
> clearing the owner. rt_mutex_set_owner is set when acquiring the lock.
> 
> Consider renaming rt_mutex_set_owner_pi to rt_mutex_clear_owner. The init
> could still use rt_mutex_set_owner as an extra barrier is not a big deal
> during init if the straight assignment was unpopular.  The init could also
> do a plain assignment because it cannot have any waiters yet.
> 
> What is less obvious is if rt_mutex_clear_owner should have explicit release
> semantics to pair with rt_mutex_set_owner. It looks like it might not
> matter because at least some paths end up having release semantics anyway
> due to a spinlock but I didn't check all cases and it's potentially fragile.
> 

There are not release semantics so, this? It passed 1 iteration but will
leave it running 10 times overnight

---
 kernel/locking/rtmutex.c     | 48 +++++++++++++++++++++++++++++++++++---------
 kernel/locking/rtmutex_api.c |  6 +++---
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7779ee8abc2a..35212f260148 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -89,15 +89,33 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
  * set this bit before looking at the lock.
  */
 
-static __always_inline void
-rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+static __always_inline struct task_struct *
+rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
 {
 	unsigned long val = (unsigned long)owner;
 
 	if (rt_mutex_has_waiters(lock))
 		val |= RT_MUTEX_HAS_WAITERS;
 
-	WRITE_ONCE(lock->owner, (struct task_struct *)val);
+	return (struct task_struct *)val;
+}
+
+
+static __always_inline void
+rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+{
+	/*
+	 * lock->wait_lock is held but explicit acquire semantics are needed
+	 * for a new lock owner so WRITE_ONCE is insufficient.
+	 */
+	xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
+}
+
+static __always_inline void
+rt_mutex_clear_owner(struct rt_mutex_base *lock)
+{
+	/* lock->wait_lock is held so the unlock provides release semantics. */
+	WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
 }
 
 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
@@ -106,7 +124,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
 			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
 }
 
-static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
+static __always_inline void
+fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
 {
 	unsigned long owner, *p = (unsigned long *) &lock->owner;
 
@@ -172,8 +191,19 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
 	 * still set.
 	 */
 	owner = READ_ONCE(*p);
-	if (owner & RT_MUTEX_HAS_WAITERS)
-		WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+	if (owner & RT_MUTEX_HAS_WAITERS) {
+		/*
+		 * See comments in rt_mutex_set_owner and
+		 * rt_mutex_clear_owner on why xchg_acquire is used for
+		 * updating owner for locking and WRITE_ONCE for unlocking.
+		 * WRITE_ONCE would work for both here although other lock
+		 * acquisitions may enter the slow path unnecessarily.
+		 */
+		if (acquire_lock)
+			xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
+		else
+			WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+	}
 }
 
 /*
@@ -1243,7 +1273,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
 	 * try_to_take_rt_mutex() sets the lock waiters bit
 	 * unconditionally. Clean this up.
 	 */
-	fixup_rt_mutex_waiters(lock);
+	fixup_rt_mutex_waiters(lock, true);
 
 	return ret;
 }
@@ -1604,7 +1634,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 	 * try_to_take_rt_mutex() sets the waiter bit
 	 * unconditionally. We might have to fix that up.
 	 */
-	fixup_rt_mutex_waiters(lock);
+	fixup_rt_mutex_waiters(lock, true);
 
 	trace_contention_end(lock, ret);
 
@@ -1719,7 +1749,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally.
 	 * We might have to fix that up:
 	 */
-	fixup_rt_mutex_waiters(lock);
+	fixup_rt_mutex_waiters(lock, true);
 	debug_rt_mutex_free_waiter(&waiter);
 
 	trace_contention_end(lock, 0);
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 900220941caa..cb9fdff76a8a 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
 void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
 {
 	debug_rt_mutex_proxy_unlock(lock);
-	rt_mutex_set_owner(lock, NULL);
+	rt_mutex_clear_owner(lock);
 }
 
 /**
@@ -382,7 +382,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
 	 * have to fix that up.
 	 */
-	fixup_rt_mutex_waiters(lock);
+	fixup_rt_mutex_waiters(lock, true);
 	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
@@ -438,7 +438,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
 	 * have to fix that up.
 	 */
-	fixup_rt_mutex_waiters(lock);
+	fixup_rt_mutex_waiters(lock, false);
 
 	raw_spin_unlock_irq(&lock->wait_lock);
 



  reply	other threads:[~2022-12-01 17:09 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-03 11:54 Crash with PREEMPT_RT on aarch64 machine Jan Kara
2022-11-04 16:30 ` Sebastian Andrzej Siewior
2022-11-07 13:56   ` Jan Kara
2022-11-07 15:10     ` Sebastian Andrzej Siewior
2022-11-07 16:30       ` Jan Kara
2022-11-07 17:12         ` Sebastian Andrzej Siewior
2022-11-07 16:49       ` Waiman Long
2022-11-08 10:53         ` Mark Rutland
2022-11-08 17:45           ` Jan Kara
2022-11-09  9:55             ` Mark Rutland
2022-11-09 10:11               ` Pierre Gondois
2022-11-09 10:54                 ` Jan Kara
2022-11-09 11:01               ` Jan Kara
2022-11-09 13:52                 ` Pierre Gondois
2022-11-09 14:21                   ` Pierre Gondois
2022-11-09 12:57         ` Will Deacon
2022-11-09 15:40           ` Jan Kara
2022-11-11 14:27             ` Jan Kara
2022-11-14 12:41               ` Will Deacon
2022-11-28 15:58                 ` Sebastian Andrzej Siewior
2022-11-28 20:30                   ` kernel test robot
2022-11-28 21:11                   ` kernel test robot
2022-11-29  5:16                   ` kernel test robot
2022-11-29  5:26                   ` kernel test robot
2022-11-29  6:48                   ` kernel test robot
2022-11-29  7:39                   ` kernel test robot
2022-11-30 17:20                   ` Pierre Gondois
2022-12-01 12:37                     ` Jan Kara
2022-11-30 20:22                   ` Mel Gorman
2022-12-01 17:09                     ` Mel Gorman [this message]
     [not found] ` <20221104080637.626-1-hdanton@sina.com>
2022-11-07 12:41   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221201170941.zxorepk6zjrhprg4@suse.de \
    --to=mgorman@suse.de \
    --cc=bigeasy@linutronix.de \
    --cc=catalin.marinas@arm.com \
    --cc=jack@suse.cz \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).