From 161e545523a7eb4c42c145c04e9a5a15903ba3d9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 7 Jul 2020 20:46:51 -0400 Subject: [PATCH 1/9] locking/pvqspinlock: Code relocation and extraction Move pv_kick_node() and the unlock functions up and extract out the hash and lock code from pv_wait_head_or_lock() into pv_hash_lock(). There is no functional change. Signed-off-by: Waiman Long --- kernel/locking/qspinlock_paravirt.h | 302 ++++++++++++++-------------- 1 file changed, 156 insertions(+), 146 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index e84d21aa0722..8eec58320b85 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -55,6 +55,7 @@ struct pv_node { /* * Hybrid PV queued/unfair lock + * ---------------------------- * * By replacing the regular queued_spin_trylock() with the function below, * it will be called once when a lock waiter enter the PV slowpath before @@ -259,6 +260,156 @@ static struct pv_node *pv_unhash(struct qspinlock *lock) BUG(); } +/* + * Insert lock into hash and set _Q_SLOW_VAL. + * Return true if lock acquired. + */ +static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node) +{ + struct qspinlock **lp = pv_hash(lock, node); + + /* + * We must hash before setting _Q_SLOW_VAL, such that + * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() + * we'll be sure to be able to observe our hash entry. + * + * [S] [Rmw] l->locked == _Q_SLOW_VAL + * MB RMB + * [RmW] l->locked = _Q_SLOW_VAL [L] + * + * Matches the smp_rmb() in __pv_queued_spin_unlock(). + */ + if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) { + /* + * The lock was free and now we own the lock. + * Change the lock value back to _Q_LOCKED_VAL + * and unhash the table. + */ + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); + WRITE_ONCE(*lp, NULL); + return true; + } + return false; +} + +/* + * Called after setting next->locked = 1 when we're the lock owner. + * + * Instead of waking the waiters stuck in pv_wait_node() advance their state + * such that they're waiting in pv_wait_head_or_lock(), this avoids a + * wake/sleep cycle. + */ +static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) +{ + struct pv_node *pn = (struct pv_node *)node; + + /* + * If the vCPU is indeed halted, advance its state to match that of + * pv_wait_node(). If OTOH this fails, the vCPU was running and will + * observe its next->locked value and advance itself. + * + * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() + * + * The write to next->locked in arch_mcs_spin_unlock_contended() + * must be ordered before the read of pn->state in the cmpxchg() + * below for the code to work correctly. To guarantee full ordering + * irrespective of the success or failure of the cmpxchg(), + * a relaxed version with explicit barrier is used. The control + * dependency will order the reading of pn->state before any + * subsequent writes. + */ + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) + != vcpu_halted) + return; + + /* + * Put the lock into the hash table and set the _Q_SLOW_VAL. + * + * As this is the same vCPU that will check the _Q_SLOW_VAL value and + * the hash table later on at unlock time, no atomic instruction is + * needed. + */ + WRITE_ONCE(lock->locked, _Q_SLOW_VAL); + (void)pv_hash(lock, pn); +} + +/* + * PV versions of the unlock fastpath and slowpath functions to be used + * instead of queued_spin_unlock(). + */ +__visible void +__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) +{ + struct pv_node *node; + + if (unlikely(locked != _Q_SLOW_VAL)) { + WARN(!debug_locks_silent, + "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", + (unsigned long)lock, atomic_read(&lock->val)); + return; + } + + /* + * A failed cmpxchg doesn't provide any memory-ordering guarantees, + * so we need a barrier to order the read of the node data in + * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. + * + * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. + */ + smp_rmb(); + + /* + * Since the above failed to release, this must be the SLOW path. + * Therefore start by looking up the blocked node and unhashing it. + */ + node = pv_unhash(lock); + + /* + * Now that we have a reference to the (likely) blocked pv_node, + * release the lock. + */ + smp_store_release(&lock->locked, 0); + + /* + * At this point the memory pointed at by lock can be freed/reused, + * however we can still use the pv_node to kick the CPU. + * The other vCPU may not really be halted, but kicking an active + * vCPU is harmless other than the additional latency in completing + * the unlock. + */ + lockevent_inc(pv_kick_unlock); + pv_kick(node->cpu); +} + +/* + * Include the architecture specific callee-save thunk of the + * __pv_queued_spin_unlock(). This thunk is put together with + * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock + * function close to each other sharing consecutive instruction cachelines. + * Alternatively, architecture specific version of __pv_queued_spin_unlock() + * can be defined. + */ +#include + +#ifndef __pv_queued_spin_unlock +__visible void __pv_queued_spin_unlock(struct qspinlock *lock) +{ + u8 locked; + + /* + * We must not unlock if SLOW, because in that case we must first + * unhash. Otherwise it would be possible to have multiple @lock + * entries, which would be BAD. + */ + locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); + if (likely(locked == _Q_LOCKED_VAL)) + return; + + __pv_queued_spin_unlock_slowpath(lock, locked); +} +#endif /* __pv_queued_spin_unlock */ + /* * Return true if when it is time to check the previous node which is not * in a running state. @@ -350,48 +501,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) */ } -/* - * Called after setting next->locked = 1 when we're the lock owner. - * - * Instead of waking the waiters stuck in pv_wait_node() advance their state - * such that they're waiting in pv_wait_head_or_lock(), this avoids a - * wake/sleep cycle. - */ -static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) -{ - struct pv_node *pn = (struct pv_node *)node; - - /* - * If the vCPU is indeed halted, advance its state to match that of - * pv_wait_node(). If OTOH this fails, the vCPU was running and will - * observe its next->locked value and advance itself. - * - * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() - * - * The write to next->locked in arch_mcs_spin_unlock_contended() - * must be ordered before the read of pn->state in the cmpxchg() - * below for the code to work correctly. To guarantee full ordering - * irrespective of the success or failure of the cmpxchg(), - * a relaxed version with explicit barrier is used. The control - * dependency will order the reading of pn->state before any - * subsequent writes. - */ - smp_mb__before_atomic(); - if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) - != vcpu_halted) - return; - - /* - * Put the lock into the hash table and set the _Q_SLOW_VAL. - * - * As this is the same vCPU that will check the _Q_SLOW_VAL value and - * the hash table later on at unlock time, no atomic instruction is - * needed. - */ - WRITE_ONCE(lock->locked, _Q_SLOW_VAL); - (void)pv_hash(lock, pn); -} - /* * Wait for l->locked to become clear and acquire the lock; * halt the vcpu after a short spin. @@ -403,16 +512,13 @@ static u32 pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - struct qspinlock **lp = NULL; int waitcnt = 0; int loop; - /* - * If pv_kick_node() already advanced our state, we don't need to + * If pv_kick_node() had already advanced our state, we don't need to * insert ourselves into the hash table anymore. */ - if (READ_ONCE(pn->state) == vcpu_hashed) - lp = (struct qspinlock **)1; + bool hashed = (READ_ONCE(pn->state) == vcpu_hashed); /* * Tracking # of slowpath locking operations @@ -439,30 +545,10 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) clear_pending(lock); - if (!lp) { /* ONCE */ - lp = pv_hash(lock, pn); - - /* - * We must hash before setting _Q_SLOW_VAL, such that - * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() - * we'll be sure to be able to observe our hash entry. - * - * [S] [Rmw] l->locked == _Q_SLOW_VAL - * MB RMB - * [RmW] l->locked = _Q_SLOW_VAL [L] - * - * Matches the smp_rmb() in __pv_queued_spin_unlock(). - */ - if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) { - /* - * The lock was free and now we own the lock. - * Change the lock value back to _Q_LOCKED_VAL - * and unhash the table. - */ - WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); - WRITE_ONCE(*lp, NULL); + if (!hashed) { /* ONCE */ + if (pv_hash_lock(lock, pn)) goto gotlock; - } + hashed = true; } WRITE_ONCE(pn->state, vcpu_hashed); lockevent_inc(pv_wait_head); @@ -484,79 +570,3 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) gotlock: return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); } - -/* - * PV versions of the unlock fastpath and slowpath functions to be used - * instead of queued_spin_unlock(). - */ -__visible void -__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) -{ - struct pv_node *node; - - if (unlikely(locked != _Q_SLOW_VAL)) { - WARN(!debug_locks_silent, - "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", - (unsigned long)lock, atomic_read(&lock->val)); - return; - } - - /* - * A failed cmpxchg doesn't provide any memory-ordering guarantees, - * so we need a barrier to order the read of the node data in - * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. - * - * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. - */ - smp_rmb(); - - /* - * Since the above failed to release, this must be the SLOW path. - * Therefore start by looking up the blocked node and unhashing it. - */ - node = pv_unhash(lock); - - /* - * Now that we have a reference to the (likely) blocked pv_node, - * release the lock. - */ - smp_store_release(&lock->locked, 0); - - /* - * At this point the memory pointed at by lock can be freed/reused, - * however we can still use the pv_node to kick the CPU. - * The other vCPU may not really be halted, but kicking an active - * vCPU is harmless other than the additional latency in completing - * the unlock. - */ - lockevent_inc(pv_kick_unlock); - pv_kick(node->cpu); -} - -/* - * Include the architecture specific callee-save thunk of the - * __pv_queued_spin_unlock(). This thunk is put together with - * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock - * function close to each other sharing consecutive instruction cachelines. - * Alternatively, architecture specific version of __pv_queued_spin_unlock() - * can be defined. - */ -#include - -#ifndef __pv_queued_spin_unlock -__visible void __pv_queued_spin_unlock(struct qspinlock *lock) -{ - u8 locked; - - /* - * We must not unlock if SLOW, because in that case we must first - * unhash. Otherwise it would be possible to have multiple @lock - * entries, which would be BAD. - */ - locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); - if (likely(locked == _Q_LOCKED_VAL)) - return; - - __pv_queued_spin_unlock_slowpath(lock, locked); -} -#endif /* __pv_queued_spin_unlock */ -- 2.18.1