On Fri, May 30, 2014 at 11:43:55AM -0400, Waiman Long wrote: > Enabling this configuration feature causes a slight decrease the > performance of an uncontended lock-unlock operation by about 1-2% > mainly due to the use of a static key. However, uncontended lock-unlock > operation are really just a tiny percentage of a real workload. So > there should no noticeable change in application performance. No, entirely unacceptable. > +#ifdef CONFIG_VIRT_UNFAIR_LOCKS > +/** > + * queue_spin_trylock_unfair - try to acquire the queue spinlock unfairly > + * @lock : Pointer to queue spinlock structure > + * Return: 1 if lock acquired, 0 if failed > + */ > +static __always_inline int queue_spin_trylock_unfair(struct qspinlock *lock) > +{ > + union arch_qspinlock *qlock = (union arch_qspinlock *)lock; > + > + if (!qlock->locked && (cmpxchg(&qlock->locked, 0, _Q_LOCKED_VAL) == 0)) > + return 1; > + return 0; > +} > + > +/** > + * queue_spin_lock_unfair - acquire a queue spinlock unfairly > + * @lock: Pointer to queue spinlock structure > + */ > +static __always_inline void queue_spin_lock_unfair(struct qspinlock *lock) > +{ > + union arch_qspinlock *qlock = (union arch_qspinlock *)lock; > + > + if (likely(cmpxchg(&qlock->locked, 0, _Q_LOCKED_VAL) == 0)) > + return; > + /* > + * Since the lock is now unfair, we should not activate the 2-task > + * pending bit spinning code path which disallows lock stealing. > + */ > + queue_spin_lock_slowpath(lock, -1); > +} Why is this needed? > +/* > + * Redefine arch_spin_lock and arch_spin_trylock as inline functions that will > + * jump to the unfair versions if the static key virt_unfairlocks_enabled > + * is true. > + */ > +#undef arch_spin_lock > +#undef arch_spin_trylock > +#undef arch_spin_lock_flags > + > +/** > + * arch_spin_lock - acquire a queue spinlock > + * @lock: Pointer to queue spinlock structure > + */ > +static inline void arch_spin_lock(struct qspinlock *lock) > +{ > + if (static_key_false(&virt_unfairlocks_enabled)) > + queue_spin_lock_unfair(lock); > + else > + queue_spin_lock(lock); > +} > + > +/** > + * arch_spin_trylock - try to acquire the queue spinlock > + * @lock : Pointer to queue spinlock structure > + * Return: 1 if lock acquired, 0 if failed > + */ > +static inline int arch_spin_trylock(struct qspinlock *lock) > +{ > + if (static_key_false(&virt_unfairlocks_enabled)) > + return queue_spin_trylock_unfair(lock); > + else > + return queue_spin_trylock(lock); > +} So I really don't see the point of all this? Why do you need special {try,}lock paths for this case? Are you worried about the upper 24bits? > diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c > index ae1b19d..3723c83 100644 > --- a/kernel/locking/qspinlock.c > +++ b/kernel/locking/qspinlock.c > @@ -217,6 +217,14 @@ static __always_inline int try_set_locked(struct qspinlock *lock) > { > struct __qspinlock *l = (void *)lock; > > +#ifdef CONFIG_VIRT_UNFAIR_LOCKS > + /* > + * Need to use atomic operation to grab the lock when lock stealing > + * can happen. > + */ > + if (static_key_false(&virt_unfairlocks_enabled)) > + return cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0; > +#endif > barrier(); > ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL; > barrier(); Why? If we have a simple test-and-set lock like below, we'll never get here at all. > @@ -252,6 +260,18 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val) > > BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); > > +#ifdef CONFIG_VIRT_UNFAIR_LOCKS > + /* > + * A simple test and set unfair lock > + */ > + if (static_key_false(&virt_unfairlocks_enabled)) { > + cpu_relax(); /* Relax after a failed lock attempt */ Meh, I don't think anybody can tell the difference if you put that in or not, therefore don't. > + while (!queue_spin_trylock(lock)) > + cpu_relax(); > + return; > + } > +#endif /* CONFIG_VIRT_UNFAIR_LOCKS */ If you're really worried about those upper 24bits, you can always clear them when you get here.