On Fri, May 30, 2014 at 11:43:55AM -0400, Waiman Long wrote:
> Enabling this configuration feature causes a slight decrease the
> performance of an uncontended lock-unlock operation by about 1-2%
> mainly due to the use of a static key. However, uncontended lock-unlock
> operation are really just a tiny percentage of a real workload. So
> there should no noticeable change in application performance.

No, entirely unacceptable.

> +#ifdef CONFIG_VIRT_UNFAIR_LOCKS
> +/**
> + * queue_spin_trylock_unfair - try to acquire the queue spinlock unfairly
> + * @lock : Pointer to queue spinlock structure
> + * Return: 1 if lock acquired, 0 if failed
> + */
> +static __always_inline int queue_spin_trylock_unfair(struct qspinlock *lock)
> +{
> +	union arch_qspinlock *qlock = (union arch_qspinlock *)lock;
> +
> +	if (!qlock->locked && (cmpxchg(&qlock->locked, 0, _Q_LOCKED_VAL) == 0))
> +		return 1;
> +	return 0;
> +}
> +
> +/**
> + * queue_spin_lock_unfair - acquire a queue spinlock unfairly
> + * @lock: Pointer to queue spinlock structure
> + */
> +static __always_inline void queue_spin_lock_unfair(struct qspinlock *lock)
> +{
> +	union arch_qspinlock *qlock = (union arch_qspinlock *)lock;
> +
> +	if (likely(cmpxchg(&qlock->locked, 0, _Q_LOCKED_VAL) == 0))
> +		return;
> +	/*
> +	 * Since the lock is now unfair, we should not activate the 2-task
> +	 * pending bit spinning code path which disallows lock stealing.
> +	 */
> +	queue_spin_lock_slowpath(lock, -1);
> +}

Why is this needed?

> +/*
> + * Redefine arch_spin_lock and arch_spin_trylock as inline functions that will
> + * jump to the unfair versions if the static key virt_unfairlocks_enabled
> + * is true.
> + */
> +#undef arch_spin_lock
> +#undef arch_spin_trylock
> +#undef arch_spin_lock_flags
> +
> +/**
> + * arch_spin_lock - acquire a queue spinlock
> + * @lock: Pointer to queue spinlock structure
> + */
> +static inline void arch_spin_lock(struct qspinlock *lock)
> +{
> +	if (static_key_false(&virt_unfairlocks_enabled))
> +		queue_spin_lock_unfair(lock);
> +	else
> +		queue_spin_lock(lock);
> +}
> +
> +/**
> + * arch_spin_trylock - try to acquire the queue spinlock
> + * @lock : Pointer to queue spinlock structure
> + * Return: 1 if lock acquired, 0 if failed
> + */
> +static inline int arch_spin_trylock(struct qspinlock *lock)
> +{
> +	if (static_key_false(&virt_unfairlocks_enabled))
> +		return queue_spin_trylock_unfair(lock);
> +	else
> +		return queue_spin_trylock(lock);
> +}

So I really don't see the point of all this? Why do you need special
{try,}lock paths for this case? Are you worried about the upper 24bits?

> diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
> index ae1b19d..3723c83 100644
> --- a/kernel/locking/qspinlock.c
> +++ b/kernel/locking/qspinlock.c
> @@ -217,6 +217,14 @@ static __always_inline int try_set_locked(struct qspinlock *lock)
>  {
>  	struct __qspinlock *l = (void *)lock;
>  
> +#ifdef CONFIG_VIRT_UNFAIR_LOCKS
> +	/*
> +	 * Need to use atomic operation to grab the lock when lock stealing
> +	 * can happen.
> +	 */
> +	if (static_key_false(&virt_unfairlocks_enabled))
> +		return cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0;
> +#endif
>  	barrier();
>  	ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL;
>  	barrier();

Why? If we have a simple test-and-set lock like below, we'll never get
here at all.

> @@ -252,6 +260,18 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>  
>  	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
>  
> +#ifdef CONFIG_VIRT_UNFAIR_LOCKS
> +	/*
> +	 * A simple test and set unfair lock
> +	 */
> +	if (static_key_false(&virt_unfairlocks_enabled)) {
> +		cpu_relax();	/* Relax after a failed lock attempt */

Meh, I don't think anybody can tell the difference if you put that in or
not, therefore don't.

> +		while (!queue_spin_trylock(lock))
> +			cpu_relax();
> +		return;
> +	}
> +#endif /* CONFIG_VIRT_UNFAIR_LOCKS */

If you're really worried about those upper 24bits, you can always clear
them when you get here.