* [PATCH-tip 1/2] locking/rwsem: Clarify usage of owner's nonspinaable bit
2019-04-15 20:58 [PATCH-tip 0/2] locking/rwsem: Rwsem rearchitecture part 2 follow-up patches Waiman Long
@ 2019-04-15 20:58 ` Waiman Long
2019-04-15 20:58 ` [PATCH-tip 2/2] locking/rwsem: Adaptive disabling of reader optimistic spinning Waiman Long
2019-04-16 13:10 ` [PATCH-tip 0/2] locking/rwsem: Rwsem rearchitecture part 2 follow-up patches Peter Zijlstra
2 siblings, 0 replies; 13+ messages in thread
From: Waiman Long @ 2019-04-15 20:58 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner
Cc: linux-kernel, x86, Davidlohr Bueso, Linus Torvalds, Tim Chen,
huang ying, Waiman Long
Bit 1 of sem->owner was previously used to designate an anonymous
owner - reader or anonymous writer. With the reader optimistic spinning
patches, bit 1 is now used to indicate that optimistic spinning should
be disabled. So change RWSEM_ANONYMOUSLY_OWNED to RWSEM_NONSPINNABLE
to clarify its current meaning.
Since we leave sem->owner unchanged when the reader unlocks, the
nonspinnable bit together with count's RWSEM_FLAG_WAITERS bit will act
like the RWSEM_FLAG_HANDOFF bit. This is too draconian for its purpose.
For fairness, we want a writer to acquire the lock after the readers
hold the lock for a relatively long time. In order to give preference
to writers under such a circumstance, the single RWSEM_NONSPINNABLE
bit is now split into two - one for reader and one for writer. When
optimistic spinning is disabled, both bits will be set. When the reader
count drop down to 0, the writer nonspinnable bit will be cleared to
allow writers to spin on the lock, but not the readers. When a writer
acquires the lock, it will write its own task structure pointer into
sem->owner and clear the reader nonspinnable bit in the process.
Signed-off-by: Waiman Long <longman@redhat.com>
---
include/linux/rwsem.h | 2 +-
kernel/locking/rwsem.c | 123 ++++++++++++++++++++++++-----------------
2 files changed, 74 insertions(+), 51 deletions(-)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 148983e21d47..bb76e82398b2 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -50,7 +50,7 @@ struct rw_semaphore {
};
/*
- * Setting bit 1 of the owner field but not bit 0 will indicate
+ * Setting all bits of the owner field except bit 0 will indicate
* that the rwsem is writer-owned with an unknown owner.
*/
#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2c8187690c7c..bb75584d99e3 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -31,13 +31,19 @@
#include "lock_events.h"
/*
- * The least significant 2 bits of the owner value has the following
+ * The least significant 3 bits of the owner value has the following
* meanings when set.
- * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
- * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
- * i.e. the owner(s) cannot be readily determined. It can be reader
- * owned or the owning writer is indeterminate. Optimistic spinning
- * should be disabled if this flag is set.
+ * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
+ * - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.
+ * - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.
+ *
+ * When the rwsem is either owned by an anonymous writer, or it is
+ * reader-owned, but a spinning writer has timed out, both nonspinnable
+ * bits will be set to disable optimistic spinning by readers and writers.
+ * In the later case, the last unlocking reader should then check the
+ * writer nonspinnable bit and clear it only to give writers preference
+ * to acquire the lock via optimistic spinning, but not readers. Similar
+ * action is also done in the reader slowpath.
*
* When a writer acquires a rwsem, it puts its task_struct pointer
* into the owner field or the count itself (64-bit only. It should
@@ -47,9 +53,7 @@
* pointer into the owner field with the RWSEM_READER_OWNED bit set.
* On unlock, the owner field will largely be left untouched. So
* for a free or reader-owned rwsem, the owner value may contain
- * information about the last reader that acquires the rwsem. The
- * anonymous bit may also be set to permanently disable optimistic
- * spinning on a reader-own rwsem until a writer comes along.
+ * information about the last reader that acquires the rwsem.
*
* That information may be helpful in debugging cases where the system
* seems to hang on a reader owned rwsem especially if only one reader
@@ -57,7 +61,10 @@
* a rwsem, but the overhead is simply too big.
*/
#define RWSEM_READER_OWNED (1UL << 0)
-#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
+#define RWSEM_RD_NONSPINNABLE (1UL << 1)
+#define RWSEM_WR_NONSPINNABLE (1UL << 2)
+#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
+#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
#ifdef CONFIG_DEBUG_RWSEMS
# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
@@ -219,7 +226,7 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
#ifdef RWSEM_MERGE_OWNER_TO_COUNT
/*
* Get the owner value from count to have early access to the task structure.
- * Owner from sem->count should includes the RWSEM_ANONYMOUSLY_OWNED bit
+ * Owner from sem->count should includes the RWSEM_NONSPINNABLE bit
* from sem->owner.
*/
static inline struct task_struct *rwsem_get_owner(struct rw_semaphore *sem)
@@ -228,12 +235,16 @@ static inline struct task_struct *rwsem_get_owner(struct rw_semaphore *sem)
unsigned long sowner = (unsigned long)READ_ONCE(sem->owner);
return (struct task_struct *) (cowner
- ? cowner | (sowner & RWSEM_ANONYMOUSLY_OWNED) : sowner);
+ ? cowner | (sowner & RWSEM_NONSPINNABLE) : sowner);
}
#else /* !RWSEM_MERGE_OWNER_TO_COUNT */
static inline struct task_struct *rwsem_get_owner(struct rw_semaphore *sem)
{
- return READ_ONCE(sem->owner);
+ unsigned long owner = (unsigned long)READ_ONCE(sem->owner);
+
+ /* Clear all the flag bits for writer */
+ return (struct task_struct *)((owner & RWSEM_READER_OWNED)
+ ? owner : (owner & ~RWSEM_OWNER_FLAGS_MASK));
}
#endif /* RWSEM_MERGE_OWNER_TO_COUNT */
@@ -260,15 +271,17 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
/*
* Return true if the a rwsem waiter can spin on the rwsem's owner
- * and steal the lock, i.e. the lock is not anonymously owned.
+ * and steal the lock.
* N.B. !owner is considered spinnable.
*/
-static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
+static inline bool is_rwsem_owner_spinnable(void *owner, bool wr)
{
- return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
+ unsigned long bit = wr ? RWSEM_WR_NONSPINNABLE : RWSEM_RD_NONSPINNABLE;
+
+ return !((unsigned long)owner & bit);
}
-static inline bool is_rwsem_owner_reader(struct task_struct *owner)
+static inline bool is_rwsem_owner_reader(void *owner)
{
return (unsigned long)owner & RWSEM_READER_OWNED;
}
@@ -276,9 +289,9 @@ static inline bool is_rwsem_owner_reader(struct task_struct *owner)
/*
* Return true if the rwsem is spinnable.
*/
-static inline bool is_rwsem_spinnable(struct rw_semaphore *sem)
+static inline bool is_rwsem_spinnable(struct rw_semaphore *sem, bool wr)
{
- return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
+ return is_rwsem_owner_spinnable(READ_ONCE(sem->owner), wr);
}
/*
@@ -298,14 +311,6 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
return (unsigned long)sem->owner & RWSEM_READER_OWNED;
}
-/*
- * Return true if rwsem is owned by an anonymous writer or readers.
- */
-static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
-{
- return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
-}
-
#ifdef CONFIG_DEBUG_RWSEMS
/*
* With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
@@ -315,12 +320,11 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
*/
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
- unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
- | RWSEM_ANONYMOUSLY_OWNED;
+ unsigned long owner = (unsigned long)READ_ONCE(sem->owner);
- if (READ_ONCE(sem->owner) == (struct task_struct *)val)
- cmpxchg_relaxed((unsigned long *)&sem->owner, val,
- RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
+ if ((owner & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current)
+ cmpxchg_relaxed((unsigned long *)&sem->owner, owner,
+ owner & RWSEM_OWNER_FLAGS_MASK);
}
#else
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
@@ -329,7 +333,7 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
#endif
/*
- * Set the RWSEM_ANONYMOUSLY_OWNED flag if the RWSEM_READER_OWNED flag
+ * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
* remains set. Otherwise, the operation will be aborted.
*/
static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
@@ -337,10 +341,10 @@ static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
long owner = (long)READ_ONCE(sem->owner);
while (is_rwsem_owner_reader((struct task_struct *)owner)) {
- if (!is_rwsem_owner_spinnable((struct task_struct *)owner))
+ if (!is_rwsem_owner_spinnable((void *)owner, true))
break;
owner = cmpxchg((long *)&sem->owner, owner,
- owner | RWSEM_ANONYMOUSLY_OWNED);
+ owner | RWSEM_NONSPINNABLE);
}
}
@@ -649,12 +653,12 @@ static inline bool owner_on_cpu(struct task_struct *owner)
return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
}
-static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem, bool wr)
{
struct task_struct *owner;
bool ret = true;
- BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
+ BUILD_BUG_ON(is_rwsem_owner_spinnable(RWSEM_OWNER_UNKNOWN, true));
if (need_resched()) {
lockevent_inc(rwsem_opt_fail);
@@ -665,7 +669,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
rcu_read_lock();
owner = rwsem_get_owner(sem);
if (owner) {
- ret = is_rwsem_owner_spinnable(owner) &&
+ ret = is_rwsem_owner_spinnable(owner, wr) &&
(is_rwsem_owner_reader(owner) || owner_on_cpu(owner));
}
rcu_read_unlock();
@@ -693,12 +697,13 @@ enum owner_state {
};
#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
-static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem)
+static noinline enum owner_state
+rwsem_spin_on_owner(struct rw_semaphore *sem, bool wr)
{
struct task_struct *owner = rwsem_get_owner(sem);
long count;
- if (!is_rwsem_owner_spinnable(owner))
+ if (!is_rwsem_owner_spinnable(owner, wr))
return OWNER_NONSPINNABLE;
rcu_read_lock();
@@ -736,7 +741,7 @@ static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem)
* spinning except when here is no active locks and the handoff bit
* is set. In this case, we have to stop spinning.
*/
- if (!is_rwsem_owner_spinnable(owner))
+ if (!is_rwsem_owner_spinnable(owner, wr))
return OWNER_NONSPINNABLE;
if (owner && !is_rwsem_owner_reader(owner))
return OWNER_WRITER;
@@ -802,7 +807,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, const long wlock)
* 2) readers own the lock and spinning count has reached 0.
*/
for (;;) {
- enum owner_state owner_state = rwsem_spin_on_owner(sem);
+ enum owner_state owner_state = rwsem_spin_on_owner(sem, wlock);
if (!(owner_state & OWNER_SPINNABLE))
break;
@@ -825,7 +830,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, const long wlock)
* state changes from non-reader to reader.
*/
if (prev_owner_state != OWNER_READER) {
- if (!is_rwsem_spinnable(sem))
+ if (!is_rwsem_spinnable(sem, wlock))
break;
rspin_threshold = rwsem_rspin_threshold(sem);
loop = 0;
@@ -884,7 +889,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, const long wlock)
return taken;
}
#else
-static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem, bool wr)
{
return false;
}
@@ -939,7 +944,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state, long count)
goto queue;
}
- if (!rwsem_can_spin_on_owner(sem))
+ if (!rwsem_can_spin_on_owner(sem, false))
goto queue;
/*
@@ -997,13 +1002,21 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state, long count)
/*
* If there are no active locks, wake the front queued process(es).
+ * Also clear the owner's RWSEM_WR_NONSPINNABLE bit if set.
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
- if (!RWSEM_COUNT_LOCKED(count) ||
- (!(count & RWSEM_WRITER_MASK) && (adjustment & RWSEM_FLAG_WAITERS)))
+ if (!RWSEM_COUNT_LOCKED(count)) {
+ /* Clear RWSEM_WR_UNSPINNABLE bit if set */
+ if (!is_rwsem_spinnable(sem, true))
+ atomic_long_andnot(RWSEM_WR_NONSPINNABLE,
+ (atomic_long_t *)&sem->owner);
+ __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ } else if (!(count & RWSEM_WRITER_MASK) &&
+ (adjustment & RWSEM_FLAG_WAITERS)) {
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ }
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
@@ -1064,7 +1077,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
const long wlock = RWSEM_WRITER_LOCKED;
/* do optimistic spinning and steal lock if possible */
- if (rwsem_can_spin_on_owner(sem) &&
+ if (rwsem_can_spin_on_owner(sem, true) &&
rwsem_optimistic_spin(sem, wlock))
return sem;
@@ -1346,8 +1359,13 @@ inline void __up_read(struct rw_semaphore *sem)
rwsem_clear_reader_owned(sem);
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS))
- == RWSEM_FLAG_WAITERS))
+ == RWSEM_FLAG_WAITERS)) {
+ /* Clear RWSEM_WR_UNSPINNABLE bit if set */
+ if (!is_rwsem_spinnable(sem, true))
+ atomic_long_andnot(RWSEM_WR_NONSPINNABLE,
+ (atomic_long_t *)&sem->owner);
rwsem_wake(sem, tmp);
+ }
}
/*
@@ -1357,7 +1375,12 @@ static inline void __up_write(struct rw_semaphore *sem)
{
long tmp;
- DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
+ /*
+ * sem->owner may differ from current if the ownership is transferred
+ * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
+ */
+ DEBUG_RWSEMS_WARN_ON((sem->owner != current) &&
+ !((long)sem->owner & RWSEM_NONSPINNABLE), sem);
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_and_release(~RWSEM_WRITER_MASK, &sem->count);
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
--
2.18.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH-tip 2/2] locking/rwsem: Adaptive disabling of reader optimistic spinning
2019-04-15 20:58 [PATCH-tip 0/2] locking/rwsem: Rwsem rearchitecture part 2 follow-up patches Waiman Long
2019-04-15 20:58 ` [PATCH-tip 1/2] locking/rwsem: Clarify usage of owner's nonspinaable bit Waiman Long
@ 2019-04-15 20:58 ` Waiman Long
2019-04-16 13:10 ` [PATCH-tip 0/2] locking/rwsem: Rwsem rearchitecture part 2 follow-up patches Peter Zijlstra
2 siblings, 0 replies; 13+ messages in thread
From: Waiman Long @ 2019-04-15 20:58 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner
Cc: linux-kernel, x86, Davidlohr Bueso, Linus Torvalds, Tim Chen,
huang ying, Waiman Long
Reader optimistic spinning is helpful when the reader critical section
is short and there aren't that many readers around. It makes readers
relatively more preferred than writers. When a writer times out spinning
on a reader-owned lock and set the nospinnable bits, there are two main
reasons for that.
1) The reader critical section is long, perhaps the task sleeps after
acquiring the read lock.
2) There are just too many readers contending the lock causing it to
take a while to service all of them.
In the former case, long reader critical section will impede the progress
of writers which is usually more important for system performance.
In the later case, reader optimistic spinning tends to make the reader
groups that contain readers that acquire the lock together smaller
leading to more of them. That may hurt performance in some cases. In
other words, the setting of nonspinnable bits indicates that reader
optimistic spinning may not be helpful for those workloads that cause it.
Therefore, any writers that had observed the setting of the writer
nonspinnable bit for a given rwsem after they fail to acquire the lock
via optimistic spinning will set the reader nonspinnable bit once they
acquire the write lock. This is to discourage reader optmistic spinning
on that particular rwsem and make writers more preferred. This adaptive
disabling of reader optimistic spinning will alleviate some of the
negative side effect of this feature.
On a 2-socket 40-core 80-thread Skylake system, the page_fault1 test of
the will-it-scale benchmark was run with various number of threads. The
number of operations done before and after the patch were:
Threads Before patch After patch % change
------- ------------ ----------- --------
20 5409075 5436456 +0.5%
40 7174080 7903845 +10.2%
60 6749707 7009784 +3.9%
80 7071334 7353806 +4.0%
This doesn't recover all the lost performance, but is close to half. Given
the fact that reader optimistic spinning does benefit some workloads, this
is a good compromise.
Using the rwsem locking microbenchmark with very short critical section,
this patch also helps performance at high contention level as shown
by the locking rates (kops/s) below with equal numbers of readers and
writers before and after this patch:
# of Threads Pre-patch Post-patch
------------ --------- ----------
2 4,472 4,839
4 4,623 4,143
8 4,764 4,126
16 4,678 3,873
32 2,847 3,263
64 2,478 3,121
80 2,222 3,104
Signed-off-by: Waiman Long <longman@redhat.com>
---
kernel/locking/lock_events_list.h | 9 ++---
kernel/locking/rwsem.c | 55 +++++++++++++++++++++++++++++--
2 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index f3550aa5866a..b0eeb77070dd 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -56,10 +56,11 @@ LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
-LOCK_EVENT(rwsem_opt_rlock) /* # of read locks opt-spin acquired */
-LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
-LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
-LOCK_EVENT(rwsem_opt_nospin) /* # of disabled reader opt-spinnings */
+LOCK_EVENT(rwsem_opt_rlock) /* # of opt-acquired read locks */
+LOCK_EVENT(rwsem_opt_wlock) /* # of opt-acquired write locks */
+LOCK_EVENT(rwsem_opt_fail) /* # of failed optspins */
+LOCK_EVENT(rwsem_opt_nospin) /* # of disabled optspins */
+LOCK_EVENT(rwsem_opt_norspin) /* # of disabled reader-only optspins */
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bb75584d99e3..d50bc7b0315f 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -59,6 +59,34 @@
* seems to hang on a reader owned rwsem especially if only one reader
* is involved. Ideally we would like to track all the readers that own
* a rwsem, but the overhead is simply too big.
+ *
+ * Reader optimistic spinning is helpful when the reader critical section
+ * is short and there aren't that many readers around. It makes readers
+ * relatively more preferred than writers. When a writer times out spinning
+ * on a reader-owned lock and set the nospinnable bits, there are two main
+ * reasons for that.
+ *
+ * 1) The reader critical section is long, perhaps the task sleeps after
+ * acquiring the read lock.
+ * 2) There are just too many readers contending the lock causing it to
+ * take a while to service all of them.
+ *
+ * In the former case, long reader critical section will impede the progress
+ * of writers which is usually more important for system performance. In
+ * the later case, reader optimistic spinning tends to make the reader
+ * groups that contain readers that acquire the lock together smaller
+ * leading to more of them. That may hurt performance in some cases. In
+ * other words, the setting of nonspinnable bits indicates that reader
+ * optimistic spinning may not be helpful for those workloads that cause
+ * it.
+ *
+ * Therefore, any writers that had observed the setting of the writer
+ * nonspinnable bit for a given rwsem after they fail to acquire the lock
+ * via optimistic spinning will set the reader nonspinnable bit once they
+ * acquire the write lock. This is to discourage reader optmistic spinning
+ * on that particular rwsem and make writers more preferred. This adaptive
+ * disabling of reader optimistic spinning will alleviate the negative
+ * side effect of this feature.
*/
#define RWSEM_READER_OWNED (1UL << 0)
#define RWSEM_RD_NONSPINNABLE (1UL << 1)
@@ -1063,6 +1091,15 @@ rwsem_down_read_failed_killable(struct rw_semaphore *sem, long cnt)
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE, cnt);
}
+static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,
+ bool disable)
+{
+ if (unlikely(disable)) {
+ *((unsigned long *)&sem->owner) |= RWSEM_RD_NONSPINNABLE;
+ lockevent_inc(rwsem_opt_norspin);
+ }
+}
+
/*
* Wait until we successfully acquire the write lock
*/
@@ -1075,12 +1112,20 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
struct rw_semaphore *ret = sem;
DEFINE_WAKE_Q(wake_q);
const long wlock = RWSEM_WRITER_LOCKED;
+ bool disable_rspin;
/* do optimistic spinning and steal lock if possible */
if (rwsem_can_spin_on_owner(sem, true) &&
rwsem_optimistic_spin(sem, wlock))
return sem;
+ /*
+ * Disable reader optimistic spinning for this rwsem after
+ * acquiring the write lock when the setting of the nonspinnable
+ * bits are observed.
+ */
+ disable_rspin = (long)READ_ONCE(sem->owner) & RWSEM_NONSPINNABLE;
+
/*
* Optimistic spinning failed, proceed to the slowpath
* and block until we can acquire the sem.
@@ -1182,6 +1227,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
}
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
+ rwsem_disable_reader_optspin(sem, disable_rspin);
raw_spin_unlock_irq(&sem->wait_lock);
lockevent_inc(rwsem_wlock);
@@ -1318,7 +1364,8 @@ static inline void __down_write(struct rw_semaphore *sem)
if (unlikely(atomic_long_cmpxchg_acquire(&sem->count, 0,
RWSEM_WRITER_LOCKED)))
rwsem_down_write_failed(sem);
- rwsem_set_owner(sem);
+ else
+ rwsem_set_owner(sem);
#ifdef RWSEM_MERGE_OWNER_TO_COUNT
DEBUG_RWSEMS_WARN_ON(sem->owner != rwsem_get_owner(sem), sem);
#endif
@@ -1327,10 +1374,12 @@ static inline void __down_write(struct rw_semaphore *sem)
static inline int __down_write_killable(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_cmpxchg_acquire(&sem->count, 0,
- RWSEM_WRITER_LOCKED)))
+ RWSEM_WRITER_LOCKED))) {
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
- rwsem_set_owner(sem);
+ } else {
+ rwsem_set_owner(sem);
+ }
return 0;
}
--
2.18.1
^ permalink raw reply related [flat|nested] 13+ messages in thread