LKML Archive on lore.kernel.org
 help / Atom feed
From: NeilBrown <neilb@suse.com>
To: Jeff Layton <jlayton@kernel.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>,
	Martin Wilck <mwilck@suse.de>,
	linux-fsdevel@vger.kernel.org,
	Frank Filz <ffilzlnx@mindspring.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 5/5] fs/locks: create a tree of dependent requests.
Date: Thu, 09 Aug 2018 12:04:41 +1000
Message-ID: <153378028121.1220.4418653283078446336.stgit@noble> (raw)
In-Reply-To: <153378012255.1220.6754153662007899557.stgit@noble>

When we find an existing lock which conflicts with a request,
and the request wants to wait, we currently add the request
to a list.  When the lock is removed, the whole list is woken.
This can cause the thundering-herd problem.
To reduce the problem, we make use of the (new) fact that
a pending request can itself have a list of blocked requests.
When we find a conflict, we look through the existing blocked requests.
If any one of them blocks the new request, the new request is attached
below that request.
This way, when the lock is released, only a set of non-conflicting
locks will be woken.  The rest of the herd can stay asleep.

Reported-and-tested-by: Martin Wilck <mwilck@suse.de>
Signed-off-by: NeilBrown <neilb@suse.com>
---
 fs/locks.c |   69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index fc64016d01ee..17843feb6f5b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -738,6 +738,39 @@ static void locks_delete_block(struct file_lock *waiter)
 	spin_unlock(&blocked_lock_lock);
 }
 
+static void wake_non_conflicts(struct file_lock *waiter, struct file_lock *blocker,
+			       enum conflict conflict(struct file_lock *,
+						      struct file_lock *))
+{
+	struct file_lock *parent = waiter;
+	struct file_lock *fl;
+	struct file_lock  *t;
+
+	fl = list_entry(&parent->fl_blocked, struct file_lock, fl_block);
+restart:
+	list_for_each_entry_safe_continue(fl, t, &parent->fl_blocked, fl_block) {
+		switch (conflict(fl, blocker)) {
+		default:
+		case FL_NO_CONFLICT:
+			__locks_wake_one(fl);
+			break;
+		case FL_CONFLICT:
+			/* Need to check children */
+			parent = fl;
+			fl = list_entry(&parent->fl_blocked, struct file_lock, fl_block);
+			goto restart;
+		case FL_TRANSITIVE_CONFLICT:
+			/* all children must also conflict, no need to check */
+			continue;
+		}
+	}
+	if (parent != waiter) {
+		parent = parent->fl_blocker;
+		fl = parent;
+		goto restart;
+	}
+}
+
 /* Insert waiter into blocker's block list.
  * We use a circular list so that processes can be easily woken up in
  * the order they blocked. The documentation doesn't require this but
@@ -747,11 +780,32 @@ static void locks_delete_block(struct file_lock *waiter)
  * fl_blocked list itself is protected by the blocked_lock_lock, but by ensuring
  * that the flc_lock is also held on insertions we can avoid taking the
  * blocked_lock_lock in some cases when we see that the fl_blocked list is empty.
+ *
+ * Rather than just adding to the list, we check for conflicts with any existing
+ * waiter, and add to that waiter instead.
+ * Thus wakeups don't happen until needed.
  */
 static void __locks_insert_block(struct file_lock *blocker,
-					struct file_lock *waiter)
+				 struct file_lock *waiter,
+				 enum conflict conflict(struct file_lock *,
+							struct file_lock *))
 {
+	struct file_lock *fl;
 	BUG_ON(!list_empty(&waiter->fl_block));
+
+	/* Any request in waiter->fl_blocked is know to conflict with
+	 * waiter, but it might not conflict with blocker.
+	 * If it doesn't, it needs to be woken now so it can find
+	 * somewhere else to wait, or possible it can get granted.
+	 */
+	if (conflict(waiter, blocker) != FL_TRANSITIVE_CONFLICT)
+		wake_non_conflicts(waiter, blocker, conflict);
+new_blocker:
+	list_for_each_entry(fl, &blocker->fl_blocked, fl_block)
+		if (conflict(fl, waiter)) {
+			blocker =  fl;
+			goto new_blocker;
+		}
 	waiter->fl_blocker = blocker;
 	list_add_tail(&waiter->fl_block, &blocker->fl_blocked);
 	if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
@@ -760,10 +814,12 @@ static void __locks_insert_block(struct file_lock *blocker,
 
 /* Must be called with flc_lock held. */
 static void locks_insert_block(struct file_lock *blocker,
-					struct file_lock *waiter)
+			       struct file_lock *waiter,
+			       enum conflict conflict(struct file_lock *,
+						      struct file_lock *))
 {
 	spin_lock(&blocked_lock_lock);
-	__locks_insert_block(blocker, waiter);
+	__locks_insert_block(blocker, waiter, conflict);
 	spin_unlock(&blocked_lock_lock);
 }
 
@@ -1033,7 +1089,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
 		if (!(request->fl_flags & FL_SLEEP))
 			goto out;
 		error = FILE_LOCK_DEFERRED;
-		locks_insert_block(fl, request);
+		locks_insert_block(fl, request, flock_locks_conflict);
 		goto out;
 	}
 	if (request->fl_flags & FL_ACCESS)
@@ -1107,7 +1163,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 			spin_lock(&blocked_lock_lock);
 			if (likely(!posix_locks_deadlock(request, fl))) {
 				error = FILE_LOCK_DEFERRED;
-				__locks_insert_block(fl, request);
+				__locks_insert_block(fl, request,
+						     posix_locks_conflict);
 			}
 			spin_unlock(&blocked_lock_lock);
 			goto out;
@@ -1581,7 +1638,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 		break_time -= jiffies;
 	if (break_time == 0)
 		break_time++;
-	locks_insert_block(fl, new_fl);
+	locks_insert_block(fl, new_fl, leases_conflict);
 	trace_break_lease_block(inode, new_fl);
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read_preempt_enable(&file_rwsem);



  parent reply index

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-09  2:04 [PATCH 0/5 - V2] locks: avoid thundering-herd wake-ups NeilBrown
2018-08-09  2:04 ` [PATCH 1/5] fs/locks: rename some lists and pointers NeilBrown
2018-08-09  2:04 ` [PATCH 2/5] fs/locks: allow a lock request to block other requests NeilBrown
2018-08-09  2:04 ` [PATCH 3/5] fs/locks: change all *_conflict() functions to return a new enum NeilBrown
2018-08-09 11:09   ` Jeff Layton
2018-08-09 13:09   ` J. Bruce Fields
2018-08-09 23:40     ` NeilBrown
2018-08-10  0:56       ` J. Bruce Fields
2018-08-09  2:04 ` [PATCH 4/5] fs/locks: split out __locks_wake_one() NeilBrown
2018-08-09  2:04 ` NeilBrown [this message]
2018-08-09 11:17   ` [PATCH 5/5] fs/locks: create a tree of dependent requests Jeff Layton
2018-08-09 23:25     ` NeilBrown
2018-08-09 14:13   ` J. Bruce Fields
2018-08-09 22:19     ` NeilBrown
2018-08-10  0:36       ` J. Bruce Fields
2018-08-09 17:32 ` [PATCH 0/5 - V2] locks: avoid thundering-herd wake-ups J. Bruce Fields
2018-08-09 22:12   ` NeilBrown
2018-08-10  0:29     ` J. Bruce Fields
2018-08-10  1:50       ` NeilBrown
2018-08-10  2:52         ` J. Bruce Fields
2018-08-10  3:17           ` NeilBrown
2018-08-10 15:47             ` J. Bruce Fields
2018-08-11 11:56               ` Jeff Layton
2018-08-11 12:35                 ` J. Bruce Fields
2018-08-11 11:51       ` Jeff Layton
2018-08-11 12:21         ` J. Bruce Fields
2018-08-11 13:15           ` Jeff Layton
2018-08-14  3:56 [PATCH 0/5 v2] " NeilBrown
2018-08-14  3:56 ` [PATCH 5/5] fs/locks: create a tree of dependent requests NeilBrown

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153378028121.1220.4418653283078446336.stgit@noble \
    --to=neilb@suse.com \
    --cc=bfields@fieldses.org \
    --cc=ffilzlnx@mindspring.com \
    --cc=jlayton@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mwilck@suse.de \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox