From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756287AbbFVM1O (ORCPT ); Mon, 22 Jun 2015 08:27:14 -0400 Received: from bombadil.infradead.org ([198.137.202.9]:37087 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933453AbbFVMYk (ORCPT ); Mon, 22 Jun 2015 08:24:40 -0400 Message-Id: <20150622122256.585322580@infradead.org> User-Agent: quilt/0.61-1 Date: Mon, 22 Jun 2015 14:16:33 +0200 From: Peter Zijlstra To: oleg@redhat.com, paulmck@linux.vnet.ibm.com Cc: tj@kernel.org, mingo@redhat.com, linux-kernel@vger.kernel.org, der.herr@hofr.at, peterz@infradead.org, dave@stgolabs.net, riel@redhat.com, viro@ZenIV.linux.org.uk, torvalds@linux-foundation.org Subject: [RFC][PATCH 10/13] fs/locks: Replace lg_global with a percpu-rwsem References: <20150622121623.291363374@infradead.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline; filename=peterz-fslocks-rwsem.patch Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Replace the global part of the lglock with a percpu-rwsem. Since fcl_lock is a spinlock and itself nests under i_lock, which too is a spinlock we cannot acquire sleeping locks at locks_{insert,remove}_global_locks(). We can however wrap all fcl_lock acquisitions with percpu_down_read such that all invocations of locks_{insert,remove}_global_locks() have that read lock held. This allows us to replace the lg_global part of the lglock with the write side of the rwsem. In the absense of writers, percpu_{down,up}_read() are free of atomic instructions. This further avoids the very long preempt-disable regions caused by lglock on larger machines. Cc: Al Viro Cc: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- fs/locks.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) --- a/fs/locks.c +++ b/fs/locks.c @@ -165,6 +165,7 @@ int lease_break_time = 45; */ DEFINE_STATIC_LGLOCK(file_lock_lglock); static DEFINE_PER_CPU(struct hlist_head, file_lock_list); +static struct percpu_rw_semaphore file_rwsem; /* * The blocked_hash is used to find POSIX lock loops for deadlock detection. @@ -556,6 +557,8 @@ static int posix_same_owner(struct file_ /* Must be called with the flc_lock held! */ static void locks_insert_global_locks(struct file_lock *fl) { + lockdep_assert_held_percpu_rwsem(&file_rwsem); + lg_local_lock(&file_lock_lglock); fl->fl_link_cpu = smp_processor_id(); hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); @@ -565,6 +568,8 @@ static void locks_insert_global_locks(st /* Must be called with the flc_lock held! */ static void locks_delete_global_locks(struct file_lock *fl) { + lockdep_assert_held_percpu_rwsem(&file_rwsem); + /* * Avoid taking lock if already unhashed. This is safe since this check * is done while holding the flc_lock, and new insertions into the list @@ -885,6 +890,7 @@ static int flock_lock_file(struct file * return -ENOMEM; } + percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; @@ -925,6 +931,7 @@ static int flock_lock_file(struct file * out: spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); @@ -960,6 +967,7 @@ static int __posix_lock_file(struct inod new_fl2 = locks_alloc_lock(); } + percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If @@ -1131,6 +1139,7 @@ static int __posix_lock_file(struct inod } out: spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); /* * Free any unused locks. */ @@ -1407,6 +1416,7 @@ int __break_lease(struct inode *inode, u return error; } + percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); @@ -1477,6 +1487,7 @@ int __break_lease(struct inode *inode, u } out: spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; @@ -1630,6 +1641,7 @@ generic_add_lease(struct file *filp, lon return -EINVAL; } + percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(dentry, arg, lease->fl_flags); @@ -1700,6 +1712,7 @@ generic_add_lease(struct file *filp, lon lease->fl_lmops->lm_setup(lease, priv); out: spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); if (is_deleg) mutex_unlock(&inode->i_mutex); @@ -1722,6 +1735,7 @@ static int generic_delete_lease(struct f return error; } + percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file == filp && @@ -1734,6 +1748,7 @@ static int generic_delete_lease(struct f if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); return error; } @@ -2634,6 +2649,7 @@ static void *locks_start(struct seq_file struct locks_iterator *iter = f->private; iter->li_pos = *pos + 1; + percpu_down_write(&file_rwsem); lg_global_lock(&file_lock_lglock); spin_lock(&blocked_lock_lock); return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); @@ -2652,6 +2668,7 @@ static void locks_stop(struct seq_file * { spin_unlock(&blocked_lock_lock); lg_global_unlock(&file_lock_lglock); + percpu_up_write(&file_rwsem); } static const struct seq_operations locks_seq_operations = { @@ -2693,6 +2710,7 @@ static int __init filelock_init(void) sizeof(struct file_lock), 0, SLAB_PANIC, NULL); lg_lock_init(&file_lock_lglock, "file_lock_lglock"); + percpu_init_rwsem(&file_rwsem); for_each_possible_cpu(i) INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in Please read the FAQ at http://www.tux.org/lkml/