From: Waiman Long <longman@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>, Will Deacon <will.deacon@arm.com>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Mike Kravetz <mike.kravetz@oracle.com>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-mm@kvack.org, Davidlohr Bueso <dave@stgolabs.net>,
Waiman Long <longman@redhat.com>
Subject: [PATCH 2/5] locking/rwsem: Enable timeout check when spinning on owner
Date: Wed, 11 Sep 2019 16:05:34 +0100 [thread overview]
Message-ID: <20190911150537.19527-3-longman@redhat.com> (raw)
In-Reply-To: <20190911150537.19527-1-longman@redhat.com>
When a task is optimistically spinning on the owner, it may do it for a
long time if there is no other running task available in the run queue.
That can be long past the given timeout value.
To prevent that from happening, the rwsem_optimistic_spin() is now
modified to check for the timeout value, if specified, to see if it
should abort early.
Signed-off-by: Waiman Long <longman@redhat.com>
---
kernel/locking/rwsem.c | 67 ++++++++++++++++++++++++++++--------------
1 file changed, 45 insertions(+), 22 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index c0285749c338..49f052d68404 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -716,11 +716,13 @@ rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long
}
static noinline enum owner_state
-rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable,
+ ktime_t timeout)
{
struct task_struct *new, *owner;
unsigned long flags, new_flags;
enum owner_state state;
+ int loopcnt = 0;
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -749,16 +751,22 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
*/
barrier();
- if (need_resched() || !owner_on_cpu(owner)) {
- state = OWNER_NONSPINNABLE;
- break;
- }
+ if (need_resched() || !owner_on_cpu(owner))
+ goto stop_optspin;
+
+ if (timeout && !(++loopcnt & 0xf) &&
+ (sched_clock() >= ktime_to_ns(timeout)))
+ goto stop_optspin;
cpu_relax();
}
rcu_read_unlock();
return state;
+
+stop_optspin:
+ rcu_read_unlock();
+ return OWNER_NONSPINNABLE;
}
/*
@@ -786,12 +794,13 @@ static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
return sched_clock() + delta;
}
-static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock,
+ ktime_t timeout)
{
bool taken = false;
int prev_owner_state = OWNER_NULL;
int loop = 0;
- u64 rspin_threshold = 0;
+ u64 rspin_threshold = 0, curtime;
unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
: RWSEM_RD_NONSPINNABLE;
@@ -801,6 +810,8 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
if (!osq_lock(&sem->osq))
goto done;
+ curtime = timeout ? sched_clock() : 0;
+
/*
* Optimistically spin on the owner field and attempt to acquire the
* lock whenever the owner changes. Spinning will be stopped when:
@@ -810,7 +821,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
for (;;) {
enum owner_state owner_state;
- owner_state = rwsem_spin_on_owner(sem, nonspinnable);
+ owner_state = rwsem_spin_on_owner(sem, nonspinnable, timeout);
if (!(owner_state & OWNER_SPINNABLE))
break;
@@ -823,6 +834,21 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
if (taken)
break;
+ /*
+ * Check current time once every 16 iterations when
+ * 1) spinning on reader-owned rwsem; or
+ * 2) a timeout value is specified.
+ *
+ * This is to avoid calling sched_clock() too frequently
+ * so as to reduce the average latency between the times
+ * when the lock becomes free and when the spinner is
+ * ready to do a trylock.
+ */
+ if ((wlock && (owner_state == OWNER_READER)) || timeout) {
+ if (!(++loop & 0xf))
+ curtime = sched_clock();
+ }
+
/*
* Time-based reader-owned rwsem optimistic spinning
*/
@@ -838,23 +864,18 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
if (rwsem_test_oflags(sem, nonspinnable))
break;
rspin_threshold = rwsem_rspin_threshold(sem);
- loop = 0;
}
- /*
- * Check time threshold once every 16 iterations to
- * avoid calling sched_clock() too frequently so
- * as to reduce the average latency between the times
- * when the lock becomes free and when the spinner
- * is ready to do a trylock.
- */
- else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
+ else if (curtime > rspin_threshold) {
rwsem_set_nonspinnable(sem);
lockevent_inc(rwsem_opt_nospin);
break;
}
}
+ if (timeout && (ns_to_ktime(curtime) >= timeout))
+ break;
+
/*
* An RT task cannot do optimistic spinning if it cannot
* be sure the lock holder is running or live-lock may
@@ -968,7 +989,8 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
return false;
}
-static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock,
+ ktime_t timeout)
{
return false;
}
@@ -982,7 +1004,8 @@ static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
}
static inline int
-rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable,
+ ktime_t timeout)
{
return 0;
}
@@ -1036,7 +1059,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
*/
atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
adjustment = 0;
- if (rwsem_optimistic_spin(sem, false)) {
+ if (rwsem_optimistic_spin(sem, false, 0)) {
/* rwsem_optimistic_spin() implies ACQUIRE on success */
/*
* Wake up other readers in the wait list if the front
@@ -1175,7 +1198,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state, ktime_t timeout)
/* do optimistic spinning and steal lock if possible */
if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
- rwsem_optimistic_spin(sem, true)) {
+ rwsem_optimistic_spin(sem, true, timeout)) {
/* rwsem_optimistic_spin() implies ACQUIRE on success */
return sem;
}
@@ -1255,7 +1278,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state, ktime_t timeout)
* without sleeping.
*/
if ((wstate == WRITER_HANDOFF) &&
- (rwsem_spin_on_owner(sem, 0) == OWNER_NULL))
+ (rwsem_spin_on_owner(sem, 0, 0) == OWNER_NULL))
goto trylock_again;
/* Block until there are no active lockers. */
--
2.18.1
next prev parent reply other threads:[~2019-09-11 15:06 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-11 15:05 [PATCH 0/5] hugetlbfs: Disable PMD sharing for large systems Waiman Long
2019-09-11 15:05 ` [PATCH 1/5] locking/rwsem: Add down_write_timedlock() Waiman Long
2019-09-11 15:05 ` Waiman Long [this message]
2019-09-11 15:05 ` [PATCH 3/5] locking/osq: Allow early break from OSQ Waiman Long
2019-09-11 15:05 ` [PATCH 4/5] locking/rwsem: Enable timeout check when staying in the OSQ Waiman Long
2019-09-11 15:05 ` [PATCH 5/5] hugetlbfs: Limit wait time when trying to share huge PMD Waiman Long
2019-09-11 15:14 ` Matthew Wilcox
2019-09-11 15:44 ` Waiman Long
2019-09-11 17:03 ` Mike Kravetz
2019-09-11 17:15 ` Waiman Long
2019-09-11 17:22 ` Qian Cai
2019-09-11 17:28 ` Waiman Long
2019-09-11 16:01 ` Qian Cai
2019-09-11 16:34 ` Waiman Long
2019-09-11 19:42 ` Qian Cai
2019-09-11 20:54 ` Waiman Long
2019-09-11 21:57 ` Qian Cai
2019-09-11 19:57 ` Matthew Wilcox
2019-09-11 20:51 ` Waiman Long
2019-09-12 3:26 ` Mike Kravetz
2019-09-12 3:41 ` Matthew Wilcox
2019-09-12 4:40 ` Davidlohr Bueso
2019-09-16 13:53 ` Waiman Long
2019-09-12 9:06 ` Waiman Long
2019-09-12 16:43 ` Mike Kravetz
2019-09-13 18:23 ` Waiman Long
2019-09-13 1:50 ` [PATCH 0/5] hugetlbfs: Disable PMD sharing for large systems Dave Chinner
2019-09-25 8:35 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190911150537.19527-3-longman@redhat.com \
--to=longman@redhat.com \
--cc=dave@stgolabs.net \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=viro@zeniv.linux.org.uk \
--cc=will.deacon@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).