Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Waiman Long <longman@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will.deacon@arm.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Mike Kravetz <mike.kravetz@oracle.com>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, Davidlohr Bueso <dave@stgolabs.net>,
	Waiman Long <longman@redhat.com>
Subject: [PATCH 2/5] locking/rwsem: Enable timeout check when spinning on owner
Date: Wed, 11 Sep 2019 16:05:34 +0100
Message-ID: <20190911150537.19527-3-longman@redhat.com> (raw)
In-Reply-To: <20190911150537.19527-1-longman@redhat.com>

When a task is optimistically spinning on the owner, it may do it for a
long time if there is no other running task available in the run queue.
That can be long past the given timeout value.

To prevent that from happening, the rwsem_optimistic_spin() is now
modified to check for the timeout value, if specified, to see if it
should abort early.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 kernel/locking/rwsem.c | 67 ++++++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 22 deletions(-)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index c0285749c338..49f052d68404 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -716,11 +716,13 @@ rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long
 }
 
 static noinline enum owner_state
-rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable,
+		    ktime_t timeout)
 {
 	struct task_struct *new, *owner;
 	unsigned long flags, new_flags;
 	enum owner_state state;
+	int loopcnt = 0;
 
 	owner = rwsem_owner_flags(sem, &flags);
 	state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -749,16 +751,22 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
 		 */
 		barrier();
 
-		if (need_resched() || !owner_on_cpu(owner)) {
-			state = OWNER_NONSPINNABLE;
-			break;
-		}
+		if (need_resched() || !owner_on_cpu(owner))
+			goto stop_optspin;
+
+		if (timeout && !(++loopcnt & 0xf) &&
+		   (sched_clock() >= ktime_to_ns(timeout)))
+			goto stop_optspin;
 
 		cpu_relax();
 	}
 	rcu_read_unlock();
 
 	return state;
+
+stop_optspin:
+	rcu_read_unlock();
+	return OWNER_NONSPINNABLE;
 }
 
 /*
@@ -786,12 +794,13 @@ static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
 	return sched_clock() + delta;
 }
 
-static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock,
+				  ktime_t timeout)
 {
 	bool taken = false;
 	int prev_owner_state = OWNER_NULL;
 	int loop = 0;
-	u64 rspin_threshold = 0;
+	u64 rspin_threshold = 0, curtime;
 	unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
 					   : RWSEM_RD_NONSPINNABLE;
 
@@ -801,6 +810,8 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
 	if (!osq_lock(&sem->osq))
 		goto done;
 
+	curtime = timeout ? sched_clock() : 0;
+
 	/*
 	 * Optimistically spin on the owner field and attempt to acquire the
 	 * lock whenever the owner changes. Spinning will be stopped when:
@@ -810,7 +821,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
 	for (;;) {
 		enum owner_state owner_state;
 
-		owner_state = rwsem_spin_on_owner(sem, nonspinnable);
+		owner_state = rwsem_spin_on_owner(sem, nonspinnable, timeout);
 		if (!(owner_state & OWNER_SPINNABLE))
 			break;
 
@@ -823,6 +834,21 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
 		if (taken)
 			break;
 
+		/*
+		 * Check current time once every 16 iterations when
+		 *  1) spinning on reader-owned rwsem; or
+		 *  2) a timeout value is specified.
+		 *
+		 * This is to avoid calling sched_clock() too frequently
+		 * so as to reduce the average latency between the times
+		 * when the lock becomes free and when the spinner is
+		 * ready to do a trylock.
+		 */
+		if ((wlock && (owner_state == OWNER_READER)) || timeout) {
+			if (!(++loop & 0xf))
+				curtime = sched_clock();
+		}
+
 		/*
 		 * Time-based reader-owned rwsem optimistic spinning
 		 */
@@ -838,23 +864,18 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
 				if (rwsem_test_oflags(sem, nonspinnable))
 					break;
 				rspin_threshold = rwsem_rspin_threshold(sem);
-				loop = 0;
 			}
 
-			/*
-			 * Check time threshold once every 16 iterations to
-			 * avoid calling sched_clock() too frequently so
-			 * as to reduce the average latency between the times
-			 * when the lock becomes free and when the spinner
-			 * is ready to do a trylock.
-			 */
-			else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
+			else if (curtime > rspin_threshold) {
 				rwsem_set_nonspinnable(sem);
 				lockevent_inc(rwsem_opt_nospin);
 				break;
 			}
 		}
 
+		if (timeout && (ns_to_ktime(curtime) >= timeout))
+			break;
+
 		/*
 		 * An RT task cannot do optimistic spinning if it cannot
 		 * be sure the lock holder is running or live-lock may
@@ -968,7 +989,8 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
 	return false;
 }
 
-static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock,
+					 ktime_t timeout)
 {
 	return false;
 }
@@ -982,7 +1004,8 @@ static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
 }
 
 static inline int
-rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable,
+		    ktime_t timeout)
 {
 	return 0;
 }
@@ -1036,7 +1059,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
 	 */
 	atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
 	adjustment = 0;
-	if (rwsem_optimistic_spin(sem, false)) {
+	if (rwsem_optimistic_spin(sem, false, 0)) {
 		/* rwsem_optimistic_spin() implies ACQUIRE on success */
 		/*
 		 * Wake up other readers in the wait list if the front
@@ -1175,7 +1198,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state, ktime_t timeout)
 
 	/* do optimistic spinning and steal lock if possible */
 	if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
-	    rwsem_optimistic_spin(sem, true)) {
+	    rwsem_optimistic_spin(sem, true, timeout)) {
 		/* rwsem_optimistic_spin() implies ACQUIRE on success */
 		return sem;
 	}
@@ -1255,7 +1278,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state, ktime_t timeout)
 		 * without sleeping.
 		 */
 		if ((wstate == WRITER_HANDOFF) &&
-		    (rwsem_spin_on_owner(sem, 0) == OWNER_NULL))
+		    (rwsem_spin_on_owner(sem, 0, 0) == OWNER_NULL))
 			goto trylock_again;
 
 		/* Block until there are no active lockers. */
-- 
2.18.1


  parent reply index

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-11 15:05 [PATCH 0/5] hugetlbfs: Disable PMD sharing for large systems Waiman Long
2019-09-11 15:05 ` [PATCH 1/5] locking/rwsem: Add down_write_timedlock() Waiman Long
2019-09-11 15:05 ` Waiman Long [this message]
2019-09-11 15:05 ` [PATCH 3/5] locking/osq: Allow early break from OSQ Waiman Long
2019-09-11 15:05 ` [PATCH 4/5] locking/rwsem: Enable timeout check when staying in the OSQ Waiman Long
2019-09-11 15:05 ` [PATCH 5/5] hugetlbfs: Limit wait time when trying to share huge PMD Waiman Long
2019-09-11 15:14   ` Matthew Wilcox
2019-09-11 15:44     ` Waiman Long
2019-09-11 17:03       ` Mike Kravetz
2019-09-11 17:15         ` Waiman Long
2019-09-11 17:22           ` Qian Cai
2019-09-11 17:28           ` Waiman Long
2019-09-11 16:01   ` Qian Cai
2019-09-11 16:34     ` Waiman Long
2019-09-11 19:42       ` Qian Cai
2019-09-11 20:54         ` Waiman Long
2019-09-11 21:57           ` Qian Cai
2019-09-11 19:57   ` Matthew Wilcox
2019-09-11 20:51     ` Waiman Long
2019-09-12  3:26   ` Mike Kravetz
2019-09-12  3:41     ` Matthew Wilcox
2019-09-12  4:40       ` Davidlohr Bueso
2019-09-16 13:53         ` Waiman Long
2019-09-12  9:06     ` Waiman Long
2019-09-12 16:43       ` Mike Kravetz
2019-09-13 18:23         ` Waiman Long
2019-09-13  1:50 ` [PATCH 0/5] hugetlbfs: Disable PMD sharing for large systems Dave Chinner
2019-09-25  8:35   ` Peter Zijlstra

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190911150537.19527-3-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=dave@stgolabs.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git