linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: oleg@redhat.com, paulmck@linux.vnet.ibm.com, tj@kernel.org,
	mingo@redhat.com, linux-kernel@vger.kernel.org, der.herr@hofr.at,
	peterz@infradead.org, dave@stgolabs.net, riel@redhat.com,
	viro@ZenIV.linux.org.uk, torvalds@linux-foundation.org,
	wagi@monom.org
Subject: [PATCH 2/7] fs/locks: Replace lg_global with a percpu-rwsem
Date: Mon, 05 Sep 2016 21:40:56 +0200	[thread overview]
Message-ID: <20160905194127.135874160@infradead.org> (raw)
In-Reply-To: 20160905194054.369038779@infradead.org

[-- Attachment #1: peter_zijlstra-fs_locks-replace_lg_global_with_a_percpu-rwsem.patch --]
[-- Type: text/plain, Size: 5292 bytes --]

Replace the global part of the lglock with a percpu-rwsem.

Since fcl_lock is a spinlock and itself nests under i_lock, which too
is a spinlock we cannot acquire sleeping locks at
locks_{insert,remove}_global_locks().

We can however wrap all fcl_lock acquisitions with percpu_down_read
such that all invocations of locks_{insert,remove}_global_locks() have
that read lock held.

This allows us to replace the lg_global part of the lglock with the
write side of the rwsem.

In the absense of writers, percpu_{down,up}_read() are free of atomic
instructions. This further avoids the very long preempt-disable
regions caused by lglock on larger machines.

Cc: der.herr@hofr.at
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: mingo@redhat.com
Cc: torvalds@linux-foundation.org
Cc: dave@stgolabs.net
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: riel@redhat.com
Cc: tj@kernel.org
Cc: paulmck@linux.vnet.ibm.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 fs/locks.c |   21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

--- a/fs/locks.c
+++ b/fs/locks.c
@@ -164,6 +164,7 @@ int lease_break_time = 45;
  */
 DEFINE_STATIC_LGLOCK(file_lock_lglock);
 static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
+DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 
 /*
  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -587,6 +588,8 @@ static int posix_same_owner(struct file_
 /* Must be called with the flc_lock held! */
 static void locks_insert_global_locks(struct file_lock *fl)
 {
+	percpu_rwsem_assert_held(&file_rwsem);
+
 	lg_local_lock(&file_lock_lglock);
 	fl->fl_link_cpu = smp_processor_id();
 	hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
@@ -596,6 +599,8 @@ static void locks_insert_global_locks(st
 /* Must be called with the flc_lock held! */
 static void locks_delete_global_locks(struct file_lock *fl)
 {
+	percpu_rwsem_assert_held(&file_rwsem);
+
 	/*
 	 * Avoid taking lock if already unhashed. This is safe since this check
 	 * is done while holding the flc_lock, and new insertions into the list
@@ -915,6 +920,7 @@ static int flock_lock_inode(struct inode
 			return -ENOMEM;
 	}
 
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	if (request->fl_flags & FL_ACCESS)
 		goto find_conflict;
@@ -955,6 +961,7 @@ static int flock_lock_inode(struct inode
 
 out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read(&file_rwsem);
 	if (new_fl)
 		locks_free_lock(new_fl);
 	locks_dispose_list(&dispose);
@@ -991,6 +998,7 @@ static int posix_lock_inode(struct inode
 		new_fl2 = locks_alloc_lock();
 	}
 
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	/*
 	 * New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1162,6 +1170,7 @@ static int posix_lock_inode(struct inode
 	}
  out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read(&file_rwsem);
 	/*
 	 * Free any unused locks.
 	 */
@@ -1436,6 +1445,7 @@ int __break_lease(struct inode *inode, u
 		return error;
 	}
 
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 
 	time_out_leases(inode, &dispose);
@@ -1487,9 +1497,13 @@ int __break_lease(struct inode *inode, u
 	locks_insert_block(fl, new_fl);
 	trace_break_lease_block(inode, new_fl);
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read(&file_rwsem);
+
 	locks_dispose_list(&dispose);
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_next, break_time);
+
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	trace_break_lease_unblock(inode, new_fl);
 	locks_delete_block(new_fl);
@@ -1506,6 +1520,7 @@ int __break_lease(struct inode *inode, u
 	}
 out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read(&file_rwsem);
 	locks_dispose_list(&dispose);
 	locks_free_lock(new_fl);
 	return error;
@@ -1660,6 +1675,7 @@ generic_add_lease(struct file *filp, lon
 		return -EINVAL;
 	}
 
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	time_out_leases(inode, &dispose);
 	error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1730,6 +1746,7 @@ generic_add_lease(struct file *filp, lon
 		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read(&file_rwsem);
 	locks_dispose_list(&dispose);
 	if (is_deleg)
 		inode_unlock(inode);
@@ -1752,6 +1769,7 @@ static int generic_delete_lease(struct f
 		return error;
 	}
 
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
 		if (fl->fl_file == filp &&
@@ -1764,6 +1782,7 @@ static int generic_delete_lease(struct f
 	if (victim)
 		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read(&file_rwsem);
 	locks_dispose_list(&dispose);
 	return error;
 }
@@ -2703,6 +2722,7 @@ static void *locks_start(struct seq_file
 	struct locks_iterator *iter = f->private;
 
 	iter->li_pos = *pos + 1;
+	percpu_down_write(&file_rwsem);
 	lg_global_lock(&file_lock_lglock);
 	spin_lock(&blocked_lock_lock);
 	return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
@@ -2721,6 +2741,7 @@ static void locks_stop(struct seq_file *
 {
 	spin_unlock(&blocked_lock_lock);
 	lg_global_unlock(&file_lock_lglock);
+	percpu_up_write(&file_rwsem);
 }
 
 static const struct seq_operations locks_seq_operations = {

  parent reply	other threads:[~2016-09-05 19:42 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-05 19:40 [PATCH 0/7] perpcu rwsem, fs/locks and killing lglocks Peter Zijlstra
2016-09-05 19:40 ` [PATCH 1/7] percpu-rwsem: DEFINE_STATIC_PERCPU_RWSEM Peter Zijlstra
2016-09-05 19:40 ` Peter Zijlstra [this message]
2016-09-05 19:40 ` [PATCH 3/7] fs/locks: Replace lg_local with a per-cpu spinlock Peter Zijlstra
2016-09-05 19:40 ` [PATCH 4/7] percpu-rwsem: Add down_read_preempt_disable() Peter Zijlstra
2016-09-05 19:40 ` [PATCH 5/7] fs/locks: Use percpu_down_read_preempt_disable Peter Zijlstra
2016-09-05 19:41 ` [PATCH 6/7] stop_machine: Remove stop_cpus_lock and lg_double_lock/unlock() Peter Zijlstra
2016-09-05 19:41 ` [PATCH 7/7] locking: Remove lglock Peter Zijlstra
2016-09-06  4:45 ` [PATCH 0/7] perpcu rwsem, fs/locks and killing lglocks Mike Galbraith
2016-09-06  1:58 [PATCH 2/7] fs/locks: Replace lg_global with a percpu-rwsem Andreas Mohr
2016-09-06  8:23 ` Peter Zijlstra
2016-09-06  8:36   ` Andreas Mohr
2016-09-06  8:59     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160905194127.135874160@infradead.org \
    --to=peterz@infradead.org \
    --cc=dave@stgolabs.net \
    --cc=der.herr@hofr.at \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@ZenIV.linux.org.uk \
    --cc=wagi@monom.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).