All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leonro@nvidia.com>, Jiri Pirko <jiri@nvidia.com>,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	idosch@idosch.org, edwin.peer@broadcom.com
Subject: [PATCH net-next] devlink: Require devlink lock during device reload
Date: Sun, 31 Oct 2021 19:35:56 +0200	[thread overview]
Message-ID: <9716f9a13e217a0a163b745b6e92e02d40973d2c.1635701665.git.leonro@nvidia.com> (raw)

From: Leon Romanovsky <leonro@nvidia.com>

Devlink reload was implemented as a special command which does _SET_
operation, but doesn't take devlink->lock, while recursive devlink
calls that were part of .reload_up()/.reload_down() sequence took it.

This fragile flow was possible due to holding a big devlink lock
(devlink_mutex), which effectively stopped all devlink activities,
even unrelated to reloaded devlink.

So let's make sure that devlink reload behaves as other commands and
use special nested locking primitives with a depth of 1. Such change
opens us to a venue of removing devlink_mutex completely, while keeping
devlink locking complexity in devlink.c.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 net/core/devlink.c | 58 ++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2d8abe88c673..e46f8ad43c20 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8734,7 +8734,6 @@ static const struct genl_small_ops devlink_nl_ops[] = {
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = devlink_nl_cmd_reload,
 		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
 	},
 	{
 		.cmd = DEVLINK_CMD_PARAM_GET,
@@ -9219,7 +9218,7 @@ int devlink_port_register(struct devlink *devlink,
 			  struct devlink_port *devlink_port,
 			  unsigned int port_index)
 {
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	if (devlink_port_index_exists(devlink, port_index)) {
 		mutex_unlock(&devlink->lock);
 		return -EEXIST;
@@ -9253,7 +9252,7 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
 
 	devlink_port_type_warn_cancel(devlink_port);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	list_del(&devlink_port->list);
 	mutex_unlock(&devlink->lock);
 	WARN_ON(!list_empty(&devlink_port->reporter_list));
@@ -9501,7 +9500,7 @@ devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
 	if (!devlink_rate)
 		return -ENOMEM;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	WARN_ON(devlink_port->devlink_rate);
 	devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
 	devlink_rate->devlink = devlink;
@@ -9531,7 +9530,7 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
 	if (!devlink_rate)
 		return;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
 	if (devlink_rate->parent)
 		refcount_dec(&devlink_rate->parent->refcnt);
@@ -9557,7 +9556,7 @@ void devlink_rate_nodes_destroy(struct devlink *devlink)
 	static struct devlink_rate *devlink_rate, *tmp;
 	const struct devlink_ops *ops = devlink->ops;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
 		if (!devlink_rate->parent)
 			continue;
@@ -9656,7 +9655,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 	struct devlink_sb *devlink_sb;
 	int err = 0;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	if (devlink_sb_index_exists(devlink, sb_index)) {
 		err = -EEXIST;
 		goto unlock;
@@ -9684,7 +9683,7 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
 {
 	struct devlink_sb *devlink_sb;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
 	WARN_ON(!devlink_sb);
 	list_del(&devlink_sb->list);
@@ -9704,7 +9703,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
 int devlink_dpipe_headers_register(struct devlink *devlink,
 				   struct devlink_dpipe_headers *dpipe_headers)
 {
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	devlink->dpipe_headers = dpipe_headers;
 	mutex_unlock(&devlink->lock);
 	return 0;
@@ -9720,7 +9719,7 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
  */
 void devlink_dpipe_headers_unregister(struct devlink *devlink)
 {
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	devlink->dpipe_headers = NULL;
 	mutex_unlock(&devlink->lock);
 }
@@ -9814,7 +9813,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
 {
 	struct devlink_dpipe_table *table;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
 					 table_name, devlink);
 	if (!table)
@@ -9856,7 +9855,7 @@ int devlink_resource_register(struct devlink *devlink,
 
 	top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (resource) {
 		err = -EINVAL;
@@ -9919,7 +9918,7 @@ void devlink_resources_unregister(struct devlink *devlink,
 		resource_list = &devlink->resource_list;
 
 	if (!resource)
-		mutex_lock(&devlink->lock);
+		mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 
 	list_for_each_entry_safe(child_resource, tmp, resource_list, list) {
 		devlink_resources_unregister(devlink, child_resource);
@@ -9946,7 +9945,7 @@ int devlink_resource_size_get(struct devlink *devlink,
 	struct devlink_resource *resource;
 	int err = 0;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (!resource) {
 		err = -EINVAL;
@@ -9975,7 +9974,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink,
 	struct devlink_dpipe_table *table;
 	int err = 0;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
 					 table_name, devlink);
 	if (!table) {
@@ -10006,7 +10005,7 @@ void devlink_resource_occ_get_register(struct devlink *devlink,
 {
 	struct devlink_resource *resource;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (WARN_ON(!resource))
 		goto out;
@@ -10030,7 +10029,7 @@ void devlink_resource_occ_get_unregister(struct devlink *devlink,
 {
 	struct devlink_resource *resource;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (WARN_ON(!resource))
 		goto out;
@@ -10278,7 +10277,7 @@ devlink_region_create(struct devlink *devlink,
 	if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
 		return ERR_PTR(-EINVAL);
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 
 	if (devlink_region_get_by_name(devlink, ops->name)) {
 		err = -EEXIST;
@@ -10369,7 +10368,7 @@ void devlink_region_destroy(struct devlink_region *region)
 	struct devlink *devlink = region->devlink;
 	struct devlink_snapshot *snapshot, *ts;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 
 	/* Free all snapshots of region */
 	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
@@ -10402,7 +10401,7 @@ int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
 {
 	int err;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	err = __devlink_region_snapshot_id_get(devlink, id);
 	mutex_unlock(&devlink->lock);
 
@@ -10422,7 +10421,7 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
  */
 void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
 {
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	__devlink_snapshot_id_decrement(devlink, id);
 	mutex_unlock(&devlink->lock);
 }
@@ -10446,7 +10445,7 @@ int devlink_region_snapshot_create(struct devlink_region *region,
 	struct devlink *devlink = region->devlink;
 	int err;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	err = __devlink_region_snapshot_create(region, data, snapshot_id);
 	mutex_unlock(&devlink->lock);
 
@@ -10831,7 +10830,7 @@ int devlink_traps_register(struct devlink *devlink,
 	if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
 		return -EINVAL;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	for (i = 0; i < traps_count; i++) {
 		const struct devlink_trap *trap = &traps[i];
 
@@ -10868,7 +10867,7 @@ void devlink_traps_unregister(struct devlink *devlink,
 {
 	int i;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	/* Make sure we do not have any packets in-flight while unregistering
 	 * traps by disabling all of them and waiting for a grace period.
 	 */
@@ -11049,7 +11048,7 @@ int devlink_trap_groups_register(struct devlink *devlink,
 {
 	int i, err;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	for (i = 0; i < groups_count; i++) {
 		const struct devlink_trap_group *group = &groups[i];
 
@@ -11086,7 +11085,7 @@ void devlink_trap_groups_unregister(struct devlink *devlink,
 {
 	int i;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	for (i = groups_count - 1; i >= 0; i--)
 		devlink_trap_group_unregister(devlink, &groups[i]);
 	mutex_unlock(&devlink->lock);
@@ -11189,7 +11188,7 @@ devlink_trap_policers_register(struct devlink *devlink,
 {
 	int i, err;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	for (i = 0; i < policers_count; i++) {
 		const struct devlink_trap_policer *policer = &policers[i];
 
@@ -11230,7 +11229,7 @@ devlink_trap_policers_unregister(struct devlink *devlink,
 {
 	int i;
 
-	mutex_lock(&devlink->lock);
+	mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
 	for (i = policers_count - 1; i >= 0; i--)
 		devlink_trap_policer_unregister(devlink, &policers[i]);
 	mutex_unlock(&devlink->lock);
@@ -11400,6 +11399,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 		if (!net_eq(devlink_net(devlink), net))
 			goto retry;
 
+		mutex_lock_nested(&devlink->lock, SINGLE_DEPTH_NESTING);
+
 		WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
 		err = devlink_reload(devlink, &init_net,
 				     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
@@ -11407,6 +11408,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 				     &actions_performed, NULL);
 		if (err && err != -EOPNOTSUPP)
 			pr_warn("Failed to reload devlink instance into init_net\n");
+		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
 	}
-- 
2.31.1


             reply	other threads:[~2021-10-31 17:36 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-31 17:35 Leon Romanovsky [this message]
2021-11-01  7:12 ` [PATCH net-next] devlink: Require devlink lock during device reload Ido Schimmel
2021-11-01 15:03 ` Jiri Pirko
2021-11-01 20:52   ` Ido Schimmel
2021-11-01 23:11     ` Jakub Kicinski
2021-11-07 17:16       ` Ido Schimmel
2021-11-07 17:54         ` Leon Romanovsky
2021-11-08 16:09           ` Jakub Kicinski
2021-11-08 17:32             ` Leon Romanovsky
2021-11-08 18:16               ` Jakub Kicinski
2021-11-08 18:24                 ` Leon Romanovsky
2021-11-08 18:46                   ` Jakub Kicinski
2021-11-08 19:58                     ` Leon Romanovsky
2021-11-08 23:31                       ` Jakub Kicinski
2021-11-09 14:12                         ` Leon Romanovsky
2021-11-09 14:17                           ` Jakub Kicinski
2021-11-09 14:30                             ` Leon Romanovsky
2021-11-09 14:49                               ` Jakub Kicinski
2021-11-09 16:29                           ` Jiri Pirko
2021-11-09 14:43                         ` Jason Gunthorpe
2021-11-09 15:07                           ` Jakub Kicinski
2021-11-09 15:33                             ` Jason Gunthorpe
2021-11-09 16:20                               ` Jakub Kicinski
2021-11-09 18:24                                 ` Jason Gunthorpe
2021-11-11 12:05                                   ` Jiri Pirko
2021-11-11 12:17                                     ` Leon Romanovsky
2021-11-12  7:38                                       ` Jiri Pirko
2021-11-14  6:19                                         ` Leon Romanovsky
2021-11-15 11:20                                           ` Jiri Pirko
2021-11-15 12:53                                             ` Jason Gunthorpe
2021-11-15 14:42                                               ` Jiri Pirko
2021-11-15 15:09                                                 ` Jason Gunthorpe
2021-11-15 15:22                                                   ` Jakub Kicinski
2021-11-16  7:00                                                     ` Jiri Pirko
2021-11-16 13:45                                                       ` Jakub Kicinski
2021-11-16  6:57                                                   ` Jiri Pirko
2021-11-16 12:44                                                     ` Jason Gunthorpe
2021-11-17 14:15                                                       ` Leon Romanovsky
2021-11-10  7:52                                 ` Leon Romanovsky
2021-11-09 16:15                             ` Jiri Pirko
2021-11-09 16:26                               ` Jakub Kicinski
2021-11-09 16:30                                 ` Jiri Pirko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9716f9a13e217a0a163b745b6e92e02d40973d2c.1635701665.git.leonro@nvidia.com \
    --to=leon@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edwin.peer@broadcom.com \
    --cc=idosch@idosch.org \
    --cc=jiri@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.