From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-he1eur01on0114.outbound.protection.outlook.com ([104.47.0.114]:63680 "EHLO EUR01-HE1-obe.outbound.protection.outlook.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1752465AbeBSJ6m (ORCPT ); Mon, 19 Feb 2018 04:58:42 -0500 Subject: [PATCH 1/3] net: Kill net_mutex From: Kirill Tkhai To: davem@davemloft.net, nicolas.dichtel@6wind.com, vyasevic@redhat.com, ktkhai@virtuozzo.com, paulmck@linux.vnet.ibm.com, vyasevich@gmail.com, mark.rutland@arm.com, gregkh@linuxfoundation.org, leonro@mellanox.com, avagin@virtuozzo.com, fw@strlen.de, roman.kapl@sysgo.com, netdev@vger.kernel.org Date: Mon, 19 Feb 2018 12:58:38 +0300 Message-ID: <151903431795.8021.5069678488269142143.stgit@localhost.localdomain> In-Reply-To: <151903409491.8021.11032992295248447417.stgit@localhost.localdomain> References: <151903409491.8021.11032992295248447417.stgit@localhost.localdomain> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: netdev-owner@vger.kernel.org List-ID: We take net_mutex, when there are !async pernet_operations registered, and read locking of net_sem is not enough. But we may get rid of taking the mutex, and just change the logic to write lock net_sem in such cases. This obviously reduces the number of lock operations, we do. Signed-off-by: Kirill Tkhai --- include/linux/rtnetlink.h | 1 - include/net/net_namespace.h | 11 ++++++--- net/core/net_namespace.c | 53 ++++++++++++++++++++++++++----------------- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index e9ee9ad0a681..3573b4bf2fdf 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -35,7 +35,6 @@ extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern wait_queue_head_t netdev_unregistering_wq; -extern struct mutex net_mutex; extern struct rw_semaphore net_sem; #ifdef CONFIG_PROVE_LOCKING diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 9158ec1ad06f..115b01b92f4d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,8 +60,11 @@ struct net { struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ - struct list_head exit_list; /* Use only net_mutex */ - + struct list_head exit_list; /* To linked to call pernet exit + * methods on dead net (net_sem + * read locked), or to unregister + * pernet ops (net_sem wr locked). + */ struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; spinlock_t nsid_lock; @@ -89,7 +92,7 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - struct list_head fib_notifier_ops; /* protected by net_mutex */ + struct list_head fib_notifier_ops; /* protected by net_sem */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; @@ -316,7 +319,7 @@ struct pernet_operations { /* * Indicates above methods are allowed to be executed in parallel * with methods of any other pernet_operations, i.e. they are not - * need synchronization via net_mutex. + * need write locked net_sem. */ bool async; }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index bcab9a938d6f..e89a516620dd 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,8 +29,6 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -/* Used only if there are !async pernet_operations registered */ -DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); @@ -407,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags, { struct ucounts *ucounts; struct net *net; + unsigned write; int rv; if (!(flags & CLONE_NEWNET)) @@ -424,20 +423,26 @@ struct net *copy_net_ns(unsigned long flags, refcount_set(&net->passive, 1); net->ucounts = ucounts; get_user_ns(user_ns); - - rv = down_read_killable(&net_sem); +again: + write = READ_ONCE(nr_sync_pernet_ops); + if (write) + rv = down_write_killable(&net_sem); + else + rv = down_read_killable(&net_sem); if (rv < 0) goto put_userns; - if (nr_sync_pernet_ops) { - rv = mutex_lock_killable(&net_mutex); - if (rv < 0) - goto up_read; + + if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { + up_read(&net_sem); + goto again; } rv = setup_net(net, user_ns); - if (nr_sync_pernet_ops) - mutex_unlock(&net_mutex); -up_read: - up_read(&net_sem); + + if (write) + up_write(&net_sem); + else + up_read(&net_sem); + if (rv < 0) { put_userns: put_user_ns(user_ns); @@ -485,15 +490,23 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct list_head net_kill_list; LIST_HEAD(net_exit_list); + unsigned write; /* Atomically snapshot the list of namespaces to cleanup */ spin_lock_irq(&cleanup_list_lock); list_replace_init(&cleanup_list, &net_kill_list); spin_unlock_irq(&cleanup_list_lock); +again: + write = READ_ONCE(nr_sync_pernet_ops); + if (write) + down_write(&net_sem); + else + down_read(&net_sem); - down_read(&net_sem); - if (nr_sync_pernet_ops) - mutex_lock(&net_mutex); + if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { + up_read(&net_sem); + goto again; + } /* Don't let anyone else find us. */ rtnl_lock(); @@ -528,14 +541,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); - if (nr_sync_pernet_ops) - mutex_unlock(&net_mutex); - /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); - up_read(&net_sem); + if (write) + up_write(&net_sem); + else + up_read(&net_sem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. @@ -563,8 +576,6 @@ static void cleanup_net(struct work_struct *work) void net_ns_barrier(void) { down_write(&net_sem); - mutex_lock(&net_mutex); - mutex_unlock(&net_mutex); up_write(&net_sem); } EXPORT_SYMBOL(net_ns_barrier);