All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gratian Crisan <gratian.crisan@ni.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Gratian Crisan <gratian.crisan@ni.com>,
	Julia Cartwright <julia@ni.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	linux-kernel@vger.kernel.org, Darren Hart <dvhart@infradead.org>,
	Ingo Molnar <mingo@redhat.com>
Subject: Re: PI futexes + lock stealing woes
Date: Thu, 07 Dec 2017 08:57:59 -0600	[thread overview]
Message-ID: <87mv2ur3ew.fsf@kerf.amer.corp.natinst.com> (raw)
In-Reply-To: <20171207142648.n4h3vzyajw2zlxv2@hirez.programming.kicks-ass.net>


Peter Zijlstra writes:
> The below compiles and boots, but is otherwise untested. Could you give
> it a spin?

Thank you! Yes, I'll start a test now.

-Gratian

> ---
>  kernel/futex.c                  | 83 +++++++++++++++++++++++++++++++++--------
>  kernel/locking/rtmutex.c        | 26 +++++++++----
>  kernel/locking/rtmutex_common.h |  1 +
>  3 files changed, 87 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/futex.c b/kernel/futex.c
> index 76ed5921117a..29ac5b64e7c7 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -2294,21 +2294,17 @@ static void unqueue_me_pi(struct futex_q *q)
>  	spin_unlock(q->lock_ptr);
>  }
>  
> -/*
> - * Fixup the pi_state owner with the new owner.
> - *
> - * Must be called with hash bucket lock held and mm->sem held for non
> - * private futexes.
> - */
>  static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
> -				struct task_struct *newowner)
> +				struct task_struct *argowner)
>  {
> -	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
>  	struct futex_pi_state *pi_state = q->pi_state;
>  	u32 uval, uninitialized_var(curval), newval;
> -	struct task_struct *oldowner;
> +	struct task_struct *oldowner, *newowner;
> +	u32 newtid;
>  	int ret;
>  
> +	lockdep_assert_held(q->lock_ptr);
> +
>  	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
>  
>  	oldowner = pi_state->owner;
> @@ -2317,11 +2313,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
>  		newtid |= FUTEX_OWNER_DIED;
>  
>  	/*
> -	 * We are here either because we stole the rtmutex from the
> -	 * previous highest priority waiter or we are the highest priority
> -	 * waiter but have failed to get the rtmutex the first time.
> +	 * We are here because either:
> +	 *
> +	 *  - we stole the lock and pi_state->owner needs updating to reflect
> +	 *    that (@argowner == current),
>  	 *
> -	 * We have to replace the newowner TID in the user space variable.
> +	 * or:
> +	 *
> +	 *  - someone stole our lock and we need to fix things to point to the
> +	 *    new owner (@argowner == NULL).
> +	 *
> +	 * Either way, we have to replace the TID in the user space variable.
>  	 * This must be atomic as we have to preserve the owner died bit here.
>  	 *
>  	 * Note: We write the user space value _before_ changing the pi_state
> @@ -2334,6 +2336,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
>  	 * in the PID check in lookup_pi_state.
>  	 */
>  retry:
> +	if (!argowner) {
> +		if (oldowner != current) {
> +			/*
> +			 * We raced against a concurrent self; things are
> +			 * already fixed up. Nothing to do.
> +			 */
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +
> +		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
> +			/* We got the lock after all, nothing to fix. */
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +
> +		/*
> +		 * Since we just failed the trylock; there must be an owner.
> +		 */
> +		newowner = rt_mutex_owner(&pi_state->pi_mutex);
> +		BUG_ON(!newowner);
> +	} else {
> +		WARN_ON_ONCE(argowner != current);
> +		if (oldowner == current) {
> +			/*
> +			 * We raced against a concurrent self; things are
> +			 * already fixed up. Nothing to do.
> +			 */
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +		newowner = argowner;
> +	}
> +
> +	newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
> +
>  	if (get_futex_value_locked(&uval, uaddr))
>  		goto handle_fault;
>  
> @@ -2434,15 +2472,28 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
>  		 * Got the lock. We might not be the anticipated owner if we
>  		 * did a lock-steal - fix up the PI-state in that case:
>  		 *
> -		 * We can safely read pi_state->owner without holding wait_lock
> -		 * because we now own the rt_mutex, only the owner will attempt
> -		 * to change it.
> +		 * Speculative pi_state->owner read (we don't hold wait_lock);
> +		 * since we own the lock pi_state->owner == current is the
> +		 * stable state, anything else needs more attention.
>  		 */
>  		if (q->pi_state->owner != current)
>  			ret = fixup_pi_state_owner(uaddr, q, current);
>  		goto out;
>  	}
>  
> +	/*
> +	 * If we didn't get the lock; check if anybody stole it from us. In
> +	 * that case, we need to fix up the uval to point to them instead of
> +	 * us, otherwise bad things happen. [10]
> +	 *
> +	 * Another speculative read; pi_state->owner == current is unstable
> +	 * but needs our attention.
> +	 */
> +	if (q->pi_state->owner == current) {
> +		ret = fixup_pi_state_owner(uaddr, q, NULL);
> +		goto out;
> +	}
> +
>  	/*
>  	 * Paranoia check. If we did not take the lock, then we should not be
>  	 * the owner of the rt_mutex.
> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 6f3dba6e4e9e..65cc0cb984e6 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
>  	return ret;
>  }
>  
> +static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
> +{
> +	int ret = try_to_take_rt_mutex(lock, current, NULL);
> +
> +	/*
> +	 * try_to_take_rt_mutex() sets the lock waiters bit
> +	 * unconditionally. Clean this up.
> +	 */
> +	fixup_rt_mutex_waiters(lock);
> +
> +	return ret;
> +}
> +
>  /*
>   * Slow path try-lock function:
>   */
> @@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
>  	 */
>  	raw_spin_lock_irqsave(&lock->wait_lock, flags);
>  
> -	ret = try_to_take_rt_mutex(lock, current, NULL);
> -
> -	/*
> -	 * try_to_take_rt_mutex() sets the lock waiters bit
> -	 * unconditionally. Clean this up.
> -	 */
> -	fixup_rt_mutex_waiters(lock);
> +	ret = __rt_mutex_slowtrylock(lock);
>  
>  	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>  
> @@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
>  	return rt_mutex_slowtrylock(lock);
>  }
>  
> +int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
> +{
> +	return __rt_mutex_slowtrylock(lock);
> +}
> +
>  /**
>   * rt_mutex_timed_lock - lock a rt_mutex interruptible
>   *			the timeout structure is provided
> diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
> index 124e98ca0b17..68686b3ec3c1 100644
> --- a/kernel/locking/rtmutex_common.h
> +++ b/kernel/locking/rtmutex_common.h
> @@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
>  				 struct rt_mutex_waiter *waiter);
>  
>  extern int rt_mutex_futex_trylock(struct rt_mutex *l);
> +extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
>  
>  extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
>  extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,

  reply	other threads:[~2017-12-07 14:58 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-29 17:56 PI futexes + lock stealing woes Julia Cartwright
2017-12-01 20:11 ` Darren Hart
2017-12-01 21:49   ` Julia Cartwright
2017-12-02  0:32     ` Darren Hart
2017-12-06 23:46 ` Peter Zijlstra
2017-12-07  2:09   ` Gratian Crisan
2017-12-07 10:45     ` Peter Zijlstra
2017-12-07 14:26       ` Peter Zijlstra
2017-12-07 14:57         ` Gratian Crisan [this message]
2017-12-07 19:50           ` Julia Cartwright
2017-12-07 23:02             ` Gratian Crisan
2017-12-08 12:49               ` [PATCH] futex: Avoid violating the 10th rule of futex Peter Zijlstra
2017-12-08 16:04                 ` Gratian Crisan
2018-01-08 21:09                 ` Julia Cartwright
2018-01-14 18:06                 ` [tip:locking/urgent] " tip-bot for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87mv2ur3ew.fsf@kerf.amer.corp.natinst.com \
    --to=gratian.crisan@ni.com \
    --cc=dvhart@infradead.org \
    --cc=julia@ni.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.