All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch net-next RFC 0/2] net: devlink: remove devlink big lock
@ 2022-06-27 13:54 Jiri Pirko
  2022-06-27 13:55 ` [patch net-next RFC 1/2] net: devlink: make sure that devlink_try_get() works with valid pointer during xarray iteration Jiri Pirko
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Jiri Pirko @ 2022-06-27 13:54 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, idosch, petrm, pabeni, edumazet, mlxsw, saeedm

From: Jiri Pirko <jiri@nvidia.com>

This is an attempt to remove use of devlink_mutex. This is a global lock
taken for every user command. That causes that long operations performed
on one devlink instance (like flash update) are blocking other
operations on different instances.

The first patch makes sure that the xarray that holds devlink pointers
is possible to be safely iterated.

The second patch moves the user command mutex to be per-devlink.

Jiri Pirko (2):
  net: devlink: make sure that devlink_try_get() works with valid
    pointer during xarray iteration
  net: devlink: replace devlink_mutex by per-devlink lock

 net/core/devlink.c | 256 ++++++++++++++++++++++++++++-----------------
 1 file changed, 161 insertions(+), 95 deletions(-)

-- 
2.35.3


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch net-next RFC 1/2] net: devlink: make sure that devlink_try_get() works with valid pointer during xarray iteration
  2022-06-27 13:54 [patch net-next RFC 0/2] net: devlink: remove devlink big lock Jiri Pirko
@ 2022-06-27 13:55 ` Jiri Pirko
  2022-06-27 13:55 ` [patch net-next RFC 2/2] net: devlink: replace devlink_mutex by per-devlink lock Jiri Pirko
  2022-06-27 15:41 ` [patch net-next RFC 0/2] net: devlink: remove devlink big lock Ido Schimmel
  2 siblings, 0 replies; 13+ messages in thread
From: Jiri Pirko @ 2022-06-27 13:55 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, idosch, petrm, pabeni, edumazet, mlxsw, saeedm

From: Jiri Pirko <jiri@nvidia.com>

Remove dependency on devlink_mutex during devlinks xarray iteration.

The devlinks xarray consistency is ensured by internally by xarray.
There is a reference taken when working with devlink using
devlink_try_get(). But there is no guarantee that devlink pointer
picked during xarray iteration is not freed before devlink_try_get()
is called.

Make sure that devlink_try_get() works with valid pointer.
Achieve it by:
1) Splitting devlink_put() so the completion is sent only
   after grace period. Completion unblocks the devlink_unregister()
   routine, which is followed-up by devlink_free()
2) Iterate the devlink xarray holding RCU read lock.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
 net/core/devlink.c | 113 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 95 insertions(+), 18 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index db61f3a341cb..9e34b4b9b19b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -69,6 +69,7 @@ struct devlink {
 	u8 reload_failed:1;
 	refcount_t refcount;
 	struct completion comp;
+	struct rcu_head rcu;
 	char priv[] __aligned(NETDEV_ALIGN);
 };
 
@@ -220,8 +221,6 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
 /* devlink_mutex
  *
  * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
  */
 static DEFINE_MUTEX(devlink_mutex);
 
@@ -231,10 +230,21 @@ struct net *devlink_net(const struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_net);
 
+static void __devlink_put_rcu(struct rcu_head *head)
+{
+	struct devlink *devlink = container_of(head, struct devlink, rcu);
+
+	complete(&devlink->comp);
+}
+
 void devlink_put(struct devlink *devlink)
 {
 	if (refcount_dec_and_test(&devlink->refcount))
-		complete(&devlink->comp);
+		/* Make sure unregister operation that may await the completion
+		 * is unblocked only after all users are after the enf of
+		 * RCU grace period.
+		 */
+		call_rcu(&devlink->rcu, __devlink_put_rcu);
 }
 
 struct devlink *__must_check devlink_try_get(struct devlink *devlink)
@@ -288,6 +298,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 
 	lockdep_assert_held(&devlink_mutex);
 
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (strcmp(devlink->dev->bus->name, busname) == 0 &&
 		    strcmp(dev_name(devlink->dev), devname) == 0 &&
@@ -299,6 +310,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 
 	if (!found || !devlink_try_get(devlink))
 		devlink = ERR_PTR(-ENODEV);
+	rcu_read_unlock();
 
 	return devlink;
 }
@@ -1322,9 +1334,11 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -1351,7 +1365,9 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 	if (err != -EMSGSIZE)
@@ -1425,29 +1441,32 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
-		if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) {
-			devlink_put(devlink);
-			continue;
-		}
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			goto retry;
 
-		if (idx < start) {
-			idx++;
-			devlink_put(devlink);
-			continue;
-		}
+		if (idx < start)
+			goto inc;
 
 		err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
 				      NETLINK_CB(cb->skb).portid,
 				      cb->nlh->nlmsg_seq, NLM_F_MULTI);
-		devlink_put(devlink);
-		if (err)
+		if (err) {
+			devlink_put(devlink);
 			goto out;
+		}
+inc:
 		idx++;
+retry:
+		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -1488,9 +1507,11 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -1516,7 +1537,9 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -2173,9 +2196,11 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -2204,7 +2229,9 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->linecards_lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -2445,9 +2472,11 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -2473,7 +2502,9 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -2597,9 +2628,11 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
 		    !devlink->ops->sb_pool_get)
@@ -2622,7 +2655,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -2818,9 +2853,11 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
 		    !devlink->ops->sb_port_pool_get)
@@ -2843,7 +2880,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -3067,9 +3106,11 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
 		    !devlink->ops->sb_tc_pool_bind_get)
@@ -3093,7 +3134,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -5154,9 +5197,11 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -5184,7 +5229,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -5389,9 +5436,11 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -5424,7 +5473,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -5973,9 +6024,11 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -5986,7 +6039,9 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
 		devlink_put(devlink);
 		if (err)
 			goto out;
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 	cb->args[0] = idx;
@@ -6507,9 +6562,11 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 	int err = 0;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -6533,7 +6590,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 		idx++;
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 	mutex_unlock(&devlink_mutex);
 
 	if (err != -EMSGSIZE)
@@ -7687,9 +7746,11 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry_rep;
@@ -7715,11 +7776,13 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->reporters_lock);
 retry_rep:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
 
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry_port;
@@ -7750,7 +7813,9 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry_port:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -8287,9 +8352,11 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -8315,7 +8382,9 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -8514,9 +8583,11 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -8543,7 +8614,9 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -8828,9 +8901,11 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
 	int err;
 
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
@@ -8857,7 +8932,9 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
 		mutex_unlock(&devlink->lock);
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 out:
 	mutex_unlock(&devlink_mutex);
 
@@ -9585,10 +9662,8 @@ void devlink_register(struct devlink *devlink)
 	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
 	/* Make sure that we are in .probe() routine */
 
-	mutex_lock(&devlink_mutex);
 	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
 	devlink_notify_register(devlink);
-	mutex_unlock(&devlink_mutex);
 }
 EXPORT_SYMBOL_GPL(devlink_register);
 
@@ -9605,10 +9680,8 @@ void devlink_unregister(struct devlink *devlink)
 	devlink_put(devlink);
 	wait_for_completion(&devlink->comp);
 
-	mutex_lock(&devlink_mutex);
 	devlink_notify_unregister(devlink);
 	xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
-	mutex_unlock(&devlink_mutex);
 }
 EXPORT_SYMBOL_GPL(devlink_unregister);
 
@@ -12118,9 +12191,11 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 	 * all devlink instances from this namespace into init_net.
 	 */
 	mutex_lock(&devlink_mutex);
+	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
 			continue;
+		rcu_read_unlock();
 
 		if (!net_eq(devlink_net(devlink), net))
 			goto retry;
@@ -12134,7 +12209,9 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 			pr_warn("Failed to reload devlink instance into init_net\n");
 retry:
 		devlink_put(devlink);
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 	mutex_unlock(&devlink_mutex);
 }
 
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [patch net-next RFC 2/2] net: devlink: replace devlink_mutex by per-devlink lock
  2022-06-27 13:54 [patch net-next RFC 0/2] net: devlink: remove devlink big lock Jiri Pirko
  2022-06-27 13:55 ` [patch net-next RFC 1/2] net: devlink: make sure that devlink_try_get() works with valid pointer during xarray iteration Jiri Pirko
@ 2022-06-27 13:55 ` Jiri Pirko
  2022-06-27 15:41 ` [patch net-next RFC 0/2] net: devlink: remove devlink big lock Ido Schimmel
  2 siblings, 0 replies; 13+ messages in thread
From: Jiri Pirko @ 2022-06-27 13:55 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, idosch, petrm, pabeni, edumazet, mlxsw, saeedm

From: Jiri Pirko <jiri@nvidia.com>

Currently devlink_mutex enforces user commands to not run in parallel.
However not only per-devlink instance, but globally.

Break this big lock into per-devlink instance mutex.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
 net/core/devlink.c | 143 +++++++++++++++++++++------------------------
 1 file changed, 66 insertions(+), 77 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 9e34b4b9b19b..966749aa3158 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -66,6 +66,8 @@ struct devlink {
 	 * port, sb, dpipe, resource, params, region, traps and more.
 	 */
 	struct mutex lock;
+	/* Makes sure only one user cmd is in execution at a time. */
+	struct mutex cmd_mutex;
 	u8 reload_failed:1;
 	refcount_t refcount;
 	struct completion comp;
@@ -218,12 +220,6 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
 #define ASSERT_DEVLINK_NOT_REGISTERED(d)                                       \
 	WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
 
-/* devlink_mutex
- *
- * An overall lock guarding every operation coming from userspace.
- */
-static DEFINE_MUTEX(devlink_mutex);
-
 struct net *devlink_net(const struct devlink *devlink)
 {
 	return read_pnet(&devlink->_net);
@@ -296,8 +292,6 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 	busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
 	devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
 
-	lockdep_assert_held(&devlink_mutex);
-
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (strcmp(devlink->dev->bus->name, busname) == 0 &&
@@ -703,8 +697,8 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
 #define DEVLINK_NL_FLAG_NEED_LINECARD		BIT(4)
 
 /* The per devlink instance lock is taken by default in the pre-doit
- * operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
+ * operation, yet several commands do not require this. The devlink
+ * command mutex is taken and protects from disruption by user-calls.
  */
 #define DEVLINK_NL_FLAG_NO_LOCK			BIT(5)
 
@@ -716,12 +710,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 	struct devlink *devlink;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
-	if (IS_ERR(devlink)) {
-		mutex_unlock(&devlink_mutex);
+	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
-	}
+	mutex_lock(&devlink->cmd_mutex);
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_lock(&devlink->lock);
 	info->user_ptr[0] = devlink;
@@ -767,8 +759,8 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 unlock:
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink->cmd_mutex);
 	devlink_put(devlink);
-	mutex_unlock(&devlink_mutex);
 	return err;
 }
 
@@ -785,8 +777,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 	}
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink->cmd_mutex);
 	devlink_put(devlink);
-	mutex_unlock(&devlink_mutex);
 }
 
 static struct genl_family devlink_nl_family;
@@ -1333,7 +1325,6 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -1343,6 +1334,7 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
 			enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
@@ -1357,19 +1349,20 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
 						   NLM_F_MULTI, NULL);
 			if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
 	if (err != -EMSGSIZE)
 		return err;
 
@@ -1440,7 +1433,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -1453,9 +1445,11 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
 		if (idx < start)
 			goto inc;
 
+		mutex_lock(&devlink->cmd_mutex);
 		err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
 				      NETLINK_CB(cb->skb).portid,
 				      cb->nlh->nlmsg_seq, NLM_F_MULTI);
+		mutex_unlock(&devlink->cmd_mutex);
 		if (err) {
 			devlink_put(devlink);
 			goto out;
@@ -1468,8 +1462,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -1506,7 +1498,6 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -1516,6 +1507,7 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_port, &devlink->port_list, list) {
 			if (idx < start) {
@@ -1529,20 +1521,20 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
 						   NLM_F_MULTI, cb->extack);
 			if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -2195,7 +2187,6 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -2205,6 +2196,7 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->linecards_lock);
 		list_for_each_entry(linecard, &devlink->linecard_list, list) {
 			if (idx < start) {
@@ -2221,20 +2213,20 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
 			mutex_unlock(&linecard->state_lock);
 			if (err) {
 				mutex_unlock(&devlink->linecards_lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->linecards_lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -2471,7 +2463,6 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -2481,6 +2472,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
 			if (idx < start) {
@@ -2494,20 +2486,20 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
 						 NLM_F_MULTI);
 			if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -2627,7 +2619,6 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -2638,6 +2629,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
 		    !devlink->ops->sb_pool_get)
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
 			err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
@@ -2648,19 +2640,19 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
 				err = 0;
 			} else if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	if (err != -EMSGSIZE)
 		return err;
 
@@ -2852,7 +2844,6 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -2863,6 +2854,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
 		    !devlink->ops->sb_port_pool_get)
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
 			err = __sb_port_pool_get_dumpit(msg, start, &idx,
@@ -2873,19 +2865,19 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
 				err = 0;
 			} else if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	if (err != -EMSGSIZE)
 		return err;
 
@@ -3105,7 +3097,6 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -3116,6 +3107,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
 		    !devlink->ops->sb_tc_pool_bind_get)
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
 			err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
@@ -3127,19 +3119,19 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
 				err = 0;
 			} else if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	if (err != -EMSGSIZE)
 		return err;
 
@@ -5196,7 +5188,6 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -5206,6 +5197,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(param_item, &devlink->param_list, list) {
 			if (idx < start) {
@@ -5221,20 +5213,20 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 				err = 0;
 			} else if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	if (err != -EMSGSIZE)
 		return err;
 
@@ -5435,7 +5427,6 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -5445,6 +5436,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(devlink_port, &devlink->port_list, list) {
 			list_for_each_entry(param_item,
@@ -5464,6 +5456,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 					err = 0;
 				} else if (err) {
 					mutex_unlock(&devlink->lock);
+					mutex_unlock(&devlink->cmd_mutex);
 					devlink_put(devlink);
 					goto out;
 				}
@@ -5471,14 +5464,13 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 			}
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	if (err != -EMSGSIZE)
 		return err;
 
@@ -6023,7 +6015,6 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -6033,8 +6024,10 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
 							       &idx, start);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		if (err)
@@ -6043,7 +6036,6 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -6297,13 +6289,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
 
 	start_offset = *((u64 *)&cb->args[0]);
 
-	mutex_lock(&devlink_mutex);
 	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
-	if (IS_ERR(devlink)) {
-		err = PTR_ERR(devlink);
-		goto out_dev;
-	}
+	if (IS_ERR(devlink))
+		return PTR_ERR(devlink);
 
+	mutex_lock(&devlink->cmd_mutex);
 	mutex_lock(&devlink->lock);
 
 	if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
@@ -6401,8 +6391,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
 	nla_nest_end(skb, chunks_attr);
 	genlmsg_end(skb, hdr);
 	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink->cmd_mutex);
 	devlink_put(devlink);
-	mutex_unlock(&devlink_mutex);
 
 	return skb->len;
 
@@ -6410,9 +6400,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
 	genlmsg_cancel(skb, hdr);
 out_unlock:
 	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink->cmd_mutex);
 	devlink_put(devlink);
-out_dev:
-	mutex_unlock(&devlink_mutex);
 	return err;
 }
 
@@ -6561,7 +6550,6 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err = 0;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -6574,12 +6562,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 		if (idx < start || !devlink->ops->info_get)
 			goto inc;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					   cb->extack);
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 		if (err == -EOPNOTSUPP)
 			err = 0;
 		else if (err) {
@@ -6593,7 +6583,6 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
-	mutex_unlock(&devlink_mutex);
 
 	if (err != -EMSGSIZE)
 		return err;
@@ -7668,18 +7657,13 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
 	struct nlattr **attrs = info->attrs;
 	struct devlink *devlink;
 
-	mutex_lock(&devlink_mutex);
 	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
 	if (IS_ERR(devlink))
-		goto unlock;
+		return NULL;
 
 	reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
 	devlink_put(devlink);
-	mutex_unlock(&devlink_mutex);
 	return reporter;
-unlock:
-	mutex_unlock(&devlink_mutex);
-	return NULL;
 }
 
 void
@@ -7745,7 +7729,6 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -7755,6 +7738,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry_rep;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->reporters_lock);
 		list_for_each_entry(reporter, &devlink->reporter_list,
 				    list) {
@@ -7768,12 +7752,14 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 				NLM_F_MULTI);
 			if (err) {
 				mutex_unlock(&devlink->reporters_lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->reporters_lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry_rep:
 		devlink_put(devlink);
 		rcu_read_lock();
@@ -7787,6 +7773,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry_port;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(port, &devlink->port_list, list) {
 			mutex_lock(&port->reporters_lock);
@@ -7803,6 +7790,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 				if (err) {
 					mutex_unlock(&port->reporters_lock);
 					mutex_unlock(&devlink->lock);
+					mutex_unlock(&devlink->cmd_mutex);
 					devlink_put(devlink);
 					goto out;
 				}
@@ -7811,14 +7799,13 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
 			mutex_unlock(&port->reporters_lock);
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry_port:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -8351,7 +8338,6 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -8361,6 +8347,7 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(trap_item, &devlink->trap_list, list) {
 			if (idx < start) {
@@ -8374,20 +8361,20 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
 						   NLM_F_MULTI);
 			if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -8582,7 +8569,6 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -8592,6 +8578,7 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(group_item, &devlink->trap_group_list,
 				    list) {
@@ -8606,20 +8593,20 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
 							 NLM_F_MULTI);
 			if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -8900,7 +8887,6 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -8910,6 +8896,7 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
 		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
 			goto retry;
 
+		mutex_lock(&devlink->cmd_mutex);
 		mutex_lock(&devlink->lock);
 		list_for_each_entry(policer_item, &devlink->trap_policer_list,
 				    list) {
@@ -8924,20 +8911,20 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
 							   NLM_F_MULTI);
 			if (err) {
 				mutex_unlock(&devlink->lock);
+				mutex_unlock(&devlink->cmd_mutex);
 				devlink_put(devlink);
 				goto out;
 			}
 			idx++;
 		}
 		mutex_unlock(&devlink->lock);
+		mutex_unlock(&devlink->cmd_mutex);
 retry:
 		devlink_put(devlink);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
 out:
-	mutex_unlock(&devlink_mutex);
-
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -9555,6 +9542,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
 	INIT_LIST_HEAD(&devlink->trap_group_list);
 	INIT_LIST_HEAD(&devlink->trap_policer_list);
 	mutex_init(&devlink->lock);
+	mutex_init(&devlink->cmd_mutex);
 	mutex_init(&devlink->reporters_lock);
 	mutex_init(&devlink->linecards_lock);
 	refcount_set(&devlink->refcount, 1);
@@ -9696,6 +9684,7 @@ void devlink_free(struct devlink *devlink)
 
 	mutex_destroy(&devlink->linecards_lock);
 	mutex_destroy(&devlink->reporters_lock);
+	mutex_destroy(&devlink->cmd_mutex);
 	mutex_destroy(&devlink->lock);
 	WARN_ON(!list_empty(&devlink->trap_policer_list));
 	WARN_ON(!list_empty(&devlink->trap_group_list));
@@ -12190,7 +12179,6 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 	/* In case network namespace is getting destroyed, reload
 	 * all devlink instances from this namespace into init_net.
 	 */
-	mutex_lock(&devlink_mutex);
 	rcu_read_lock();
 	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
 		if (!devlink_try_get(devlink))
@@ -12201,10 +12189,12 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 			goto retry;
 
 		WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
+		mutex_lock(&devlink->cmd_mutex);
 		err = devlink_reload(devlink, &init_net,
 				     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
 				     DEVLINK_RELOAD_LIMIT_UNSPEC,
 				     &actions_performed, NULL);
+		mutex_unlock(&devlink->cmd_mutex);
 		if (err && err != -EOPNOTSUPP)
 			pr_warn("Failed to reload devlink instance into init_net\n");
 retry:
@@ -12212,7 +12202,6 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
-	mutex_unlock(&devlink_mutex);
 }
 
 static struct pernet_operations devlink_pernet_ops __net_initdata = {
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-27 13:54 [patch net-next RFC 0/2] net: devlink: remove devlink big lock Jiri Pirko
  2022-06-27 13:55 ` [patch net-next RFC 1/2] net: devlink: make sure that devlink_try_get() works with valid pointer during xarray iteration Jiri Pirko
  2022-06-27 13:55 ` [patch net-next RFC 2/2] net: devlink: replace devlink_mutex by per-devlink lock Jiri Pirko
@ 2022-06-27 15:41 ` Ido Schimmel
  2022-06-27 15:55   ` Jiri Pirko
  2022-06-27 17:49   ` Jakub Kicinski
  2 siblings, 2 replies; 13+ messages in thread
From: Ido Schimmel @ 2022-06-27 15:41 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@nvidia.com>
> 
> This is an attempt to remove use of devlink_mutex. This is a global lock
> taken for every user command. That causes that long operations performed
> on one devlink instance (like flash update) are blocking other
> operations on different instances.

This patchset is supposed to prevent one devlink instance from blocking
another? Devlink does not enable "parallel_ops", which means that the
generic netlink mutex is serializing all user space operations. AFAICT,
this series does not enable "parallel_ops", so I'm not sure what
difference the removal of the devlink mutex makes.

The devlink mutex (in accordance with the comment above it) serializes
all user space operations and accesses to the devlink devices list. This
resulted in a AA deadlock in the previous submission because we had a
flow where a user space operation (which acquires this mutex) also tries
to register / unregister a nested devlink instance which also tries to
acquire the mutex.

As long as devlink does not implement "parallel_ops", it seems that the
devlink mutex can be reduced to only serializing accesses to the devlink
devices list, thereby eliminating the deadlock.

> 
> The first patch makes sure that the xarray that holds devlink pointers
> is possible to be safely iterated.
> 
> The second patch moves the user command mutex to be per-devlink.
> 
> Jiri Pirko (2):
>   net: devlink: make sure that devlink_try_get() works with valid
>     pointer during xarray iteration
>   net: devlink: replace devlink_mutex by per-devlink lock
> 
>  net/core/devlink.c | 256 ++++++++++++++++++++++++++++-----------------
>  1 file changed, 161 insertions(+), 95 deletions(-)
> 
> -- 
> 2.35.3
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-27 15:41 ` [patch net-next RFC 0/2] net: devlink: remove devlink big lock Ido Schimmel
@ 2022-06-27 15:55   ` Jiri Pirko
  2022-06-28  7:43     ` Ido Schimmel
  2022-06-27 17:49   ` Jakub Kicinski
  1 sibling, 1 reply; 13+ messages in thread
From: Jiri Pirko @ 2022-06-27 15:55 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

Mon, Jun 27, 2022 at 05:41:31PM CEST, idosch@nvidia.com wrote:
>On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@nvidia.com>
>> 
>> This is an attempt to remove use of devlink_mutex. This is a global lock
>> taken for every user command. That causes that long operations performed
>> on one devlink instance (like flash update) are blocking other
>> operations on different instances.
>
>This patchset is supposed to prevent one devlink instance from blocking
>another? Devlink does not enable "parallel_ops", which means that the
>generic netlink mutex is serializing all user space operations. AFAICT,
>this series does not enable "parallel_ops", so I'm not sure what
>difference the removal of the devlink mutex makes.

You are correct, that is missing. For me, as a side effect this patchset
resolved the deadlock for LC auxdev you pointed out. That was my
motivation for this patchset :)


>
>The devlink mutex (in accordance with the comment above it) serializes
>all user space operations and accesses to the devlink devices list. This
>resulted in a AA deadlock in the previous submission because we had a
>flow where a user space operation (which acquires this mutex) also tries
>to register / unregister a nested devlink instance which also tries to
>acquire the mutex.
>
>As long as devlink does not implement "parallel_ops", it seems that the
>devlink mutex can be reduced to only serializing accesses to the devlink
>devices list, thereby eliminating the deadlock.
>
>> 
>> The first patch makes sure that the xarray that holds devlink pointers
>> is possible to be safely iterated.
>> 
>> The second patch moves the user command mutex to be per-devlink.
>> 
>> Jiri Pirko (2):
>>   net: devlink: make sure that devlink_try_get() works with valid
>>     pointer during xarray iteration
>>   net: devlink: replace devlink_mutex by per-devlink lock
>> 
>>  net/core/devlink.c | 256 ++++++++++++++++++++++++++++-----------------
>>  1 file changed, 161 insertions(+), 95 deletions(-)
>> 
>> -- 
>> 2.35.3
>> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-27 15:41 ` [patch net-next RFC 0/2] net: devlink: remove devlink big lock Ido Schimmel
  2022-06-27 15:55   ` Jiri Pirko
@ 2022-06-27 17:49   ` Jakub Kicinski
  2022-06-28  6:32     ` Jiri Pirko
  1 sibling, 1 reply; 13+ messages in thread
From: Jakub Kicinski @ 2022-06-27 17:49 UTC (permalink / raw)
  To: Ido Schimmel, Jiri Pirko
  Cc: netdev, davem, petrm, pabeni, edumazet, mlxsw, saeedm

On Mon, 27 Jun 2022 18:41:31 +0300 Ido Schimmel wrote:
> On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
> > This is an attempt to remove use of devlink_mutex. This is a global lock
> > taken for every user command. That causes that long operations performed
> > on one devlink instance (like flash update) are blocking other
> > operations on different instances.  
> 
> This patchset is supposed to prevent one devlink instance from blocking
> another? Devlink does not enable "parallel_ops", which means that the
> generic netlink mutex is serializing all user space operations. AFAICT,
> this series does not enable "parallel_ops", so I'm not sure what
> difference the removal of the devlink mutex makes.
> 
> The devlink mutex (in accordance with the comment above it) serializes
> all user space operations and accesses to the devlink devices list. This
> resulted in a AA deadlock in the previous submission because we had a
> flow where a user space operation (which acquires this mutex) also tries
> to register / unregister a nested devlink instance which also tries to
> acquire the mutex.
> 
> As long as devlink does not implement "parallel_ops", it seems that the
> devlink mutex can be reduced to only serializing accesses to the devlink
> devices list, thereby eliminating the deadlock.

I'm unclear on why we can't wait for mlx5 locking rework which will
allow us to move completely to per-instance locks. Do you have extra
insights into how that work is progressing? I was hoping that it will
be complete in the next two months. 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-27 17:49   ` Jakub Kicinski
@ 2022-06-28  6:32     ` Jiri Pirko
  2022-06-28  7:04       ` Jiri Pirko
  0 siblings, 1 reply; 13+ messages in thread
From: Jiri Pirko @ 2022-06-28  6:32 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Ido Schimmel, netdev, davem, petrm, pabeni, edumazet, mlxsw, saeedm

Mon, Jun 27, 2022 at 07:49:45PM CEST, kuba@kernel.org wrote:
>On Mon, 27 Jun 2022 18:41:31 +0300 Ido Schimmel wrote:
>> On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
>> > This is an attempt to remove use of devlink_mutex. This is a global lock
>> > taken for every user command. That causes that long operations performed
>> > on one devlink instance (like flash update) are blocking other
>> > operations on different instances.  
>> 
>> This patchset is supposed to prevent one devlink instance from blocking
>> another? Devlink does not enable "parallel_ops", which means that the
>> generic netlink mutex is serializing all user space operations. AFAICT,
>> this series does not enable "parallel_ops", so I'm not sure what
>> difference the removal of the devlink mutex makes.
>> 
>> The devlink mutex (in accordance with the comment above it) serializes
>> all user space operations and accesses to the devlink devices list. This
>> resulted in a AA deadlock in the previous submission because we had a
>> flow where a user space operation (which acquires this mutex) also tries
>> to register / unregister a nested devlink instance which also tries to
>> acquire the mutex.
>> 
>> As long as devlink does not implement "parallel_ops", it seems that the
>> devlink mutex can be reduced to only serializing accesses to the devlink
>> devices list, thereby eliminating the deadlock.
>
>I'm unclear on why we can't wait for mlx5 locking rework which will

Sure we can, no rush.

>allow us to move completely to per-instance locks. Do you have extra
>insights into how that work is progressing? I was hoping that it will

It's under internal review afaik.

>be complete in the next two months. 

What do you mean exactly? Is that that we would be okay just with
devlink->lock? I don't think so. We need user lock because we can't take
devlink->lock for port split and reload. devlink_mutex protects that now,
the devlink->cmd_lock I'm introducing here just replaces devlink_mutex.
If we can do without, that is fine. I just can't see how.
Also, I don't see the relation to mlx5 work. What is that?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-28  6:32     ` Jiri Pirko
@ 2022-06-28  7:04       ` Jiri Pirko
  0 siblings, 0 replies; 13+ messages in thread
From: Jiri Pirko @ 2022-06-28  7:04 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Ido Schimmel, netdev, davem, petrm, pabeni, edumazet, mlxsw, saeedm

Tue, Jun 28, 2022 at 08:32:49AM CEST, jiri@resnulli.us wrote:
>Mon, Jun 27, 2022 at 07:49:45PM CEST, kuba@kernel.org wrote:
>>On Mon, 27 Jun 2022 18:41:31 +0300 Ido Schimmel wrote:
>>> On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
>>> > This is an attempt to remove use of devlink_mutex. This is a global lock
>>> > taken for every user command. That causes that long operations performed
>>> > on one devlink instance (like flash update) are blocking other
>>> > operations on different instances.  
>>> 
>>> This patchset is supposed to prevent one devlink instance from blocking
>>> another? Devlink does not enable "parallel_ops", which means that the
>>> generic netlink mutex is serializing all user space operations. AFAICT,
>>> this series does not enable "parallel_ops", so I'm not sure what
>>> difference the removal of the devlink mutex makes.
>>> 
>>> The devlink mutex (in accordance with the comment above it) serializes
>>> all user space operations and accesses to the devlink devices list. This
>>> resulted in a AA deadlock in the previous submission because we had a
>>> flow where a user space operation (which acquires this mutex) also tries
>>> to register / unregister a nested devlink instance which also tries to
>>> acquire the mutex.
>>> 
>>> As long as devlink does not implement "parallel_ops", it seems that the
>>> devlink mutex can be reduced to only serializing accesses to the devlink
>>> devices list, thereby eliminating the deadlock.
>>
>>I'm unclear on why we can't wait for mlx5 locking rework which will
>
>Sure we can, no rush.
>
>>allow us to move completely to per-instance locks. Do you have extra
>>insights into how that work is progressing? I was hoping that it will
>
>It's under internal review afaik.
>
>>be complete in the next two months. 
>
>What do you mean exactly? Is that that we would be okay just with
>devlink->lock? I don't think so. We need user lock because we can't take
>devlink->lock for port split and reload. devlink_mutex protects that now,

Okay, I take back port split, that is already fixed.
Moshe is taking care of the reset (port_new/del, reporter_*). I will
check out the reload. One we have that, you are correct, we are fine
with devlink->lock instance lock.

Thanks!


>the devlink->cmd_lock I'm introducing here just replaces devlink_mutex.
>If we can do without, that is fine. I just can't see how.
>Also, I don't see the relation to mlx5 work. What is that?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-27 15:55   ` Jiri Pirko
@ 2022-06-28  7:43     ` Ido Schimmel
  2022-06-29 10:25       ` Jiri Pirko
  0 siblings, 1 reply; 13+ messages in thread
From: Ido Schimmel @ 2022-06-28  7:43 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

On Mon, Jun 27, 2022 at 05:55:06PM +0200, Jiri Pirko wrote:
> Mon, Jun 27, 2022 at 05:41:31PM CEST, idosch@nvidia.com wrote:
> >On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
> >> From: Jiri Pirko <jiri@nvidia.com>
> >> 
> >> This is an attempt to remove use of devlink_mutex. This is a global lock
> >> taken for every user command. That causes that long operations performed
> >> on one devlink instance (like flash update) are blocking other
> >> operations on different instances.
> >
> >This patchset is supposed to prevent one devlink instance from blocking
> >another? Devlink does not enable "parallel_ops", which means that the
> >generic netlink mutex is serializing all user space operations. AFAICT,
> >this series does not enable "parallel_ops", so I'm not sure what
> >difference the removal of the devlink mutex makes.
> 
> You are correct, that is missing. For me, as a side effect this patchset
> resolved the deadlock for LC auxdev you pointed out. That was my
> motivation for this patchset :)

Given that devlink does not enable "parallel_ops" and that the generic
netlink mutex is held throughout all callbacks, what prevents you from
simply dropping the devlink mutex now? IOW, why can't this series be
patch #1 and another patch that removes the devlink mutex?

> 
> 
> >
> >The devlink mutex (in accordance with the comment above it) serializes
> >all user space operations and accesses to the devlink devices list. This
> >resulted in a AA deadlock in the previous submission because we had a
> >flow where a user space operation (which acquires this mutex) also tries
> >to register / unregister a nested devlink instance which also tries to
> >acquire the mutex.
> >
> >As long as devlink does not implement "parallel_ops", it seems that the
> >devlink mutex can be reduced to only serializing accesses to the devlink
> >devices list, thereby eliminating the deadlock.
> >
> >> 
> >> The first patch makes sure that the xarray that holds devlink pointers
> >> is possible to be safely iterated.
> >> 
> >> The second patch moves the user command mutex to be per-devlink.
> >> 
> >> Jiri Pirko (2):
> >>   net: devlink: make sure that devlink_try_get() works with valid
> >>     pointer during xarray iteration
> >>   net: devlink: replace devlink_mutex by per-devlink lock
> >> 
> >>  net/core/devlink.c | 256 ++++++++++++++++++++++++++++-----------------
> >>  1 file changed, 161 insertions(+), 95 deletions(-)
> >> 
> >> -- 
> >> 2.35.3
> >> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-28  7:43     ` Ido Schimmel
@ 2022-06-29 10:25       ` Jiri Pirko
  2022-06-29 10:36         ` Jiri Pirko
  0 siblings, 1 reply; 13+ messages in thread
From: Jiri Pirko @ 2022-06-29 10:25 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

Tue, Jun 28, 2022 at 09:43:26AM CEST, idosch@nvidia.com wrote:
>On Mon, Jun 27, 2022 at 05:55:06PM +0200, Jiri Pirko wrote:
>> Mon, Jun 27, 2022 at 05:41:31PM CEST, idosch@nvidia.com wrote:
>> >On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
>> >> From: Jiri Pirko <jiri@nvidia.com>
>> >> 
>> >> This is an attempt to remove use of devlink_mutex. This is a global lock
>> >> taken for every user command. That causes that long operations performed
>> >> on one devlink instance (like flash update) are blocking other
>> >> operations on different instances.
>> >
>> >This patchset is supposed to prevent one devlink instance from blocking
>> >another? Devlink does not enable "parallel_ops", which means that the
>> >generic netlink mutex is serializing all user space operations. AFAICT,
>> >this series does not enable "parallel_ops", so I'm not sure what
>> >difference the removal of the devlink mutex makes.
>> 
>> You are correct, that is missing. For me, as a side effect this patchset
>> resolved the deadlock for LC auxdev you pointed out. That was my
>> motivation for this patchset :)
>
>Given that devlink does not enable "parallel_ops" and that the generic
>netlink mutex is held throughout all callbacks, what prevents you from
>simply dropping the devlink mutex now? IOW, why can't this series be
>patch #1 and another patch that removes the devlink mutex?

Yep, I think you are correct. We are currently working with Moshe on
conversion of commands that does not late devlink->lock (like health
reporters and reload) to take devlink->lock. Once we have that, we can
enable parallel_ops.

>
>> 
>> 
>> >
>> >The devlink mutex (in accordance with the comment above it) serializes
>> >all user space operations and accesses to the devlink devices list. This
>> >resulted in a AA deadlock in the previous submission because we had a
>> >flow where a user space operation (which acquires this mutex) also tries
>> >to register / unregister a nested devlink instance which also tries to
>> >acquire the mutex.
>> >
>> >As long as devlink does not implement "parallel_ops", it seems that the
>> >devlink mutex can be reduced to only serializing accesses to the devlink
>> >devices list, thereby eliminating the deadlock.
>> >
>> >> 
>> >> The first patch makes sure that the xarray that holds devlink pointers
>> >> is possible to be safely iterated.
>> >> 
>> >> The second patch moves the user command mutex to be per-devlink.
>> >> 
>> >> Jiri Pirko (2):
>> >>   net: devlink: make sure that devlink_try_get() works with valid
>> >>     pointer during xarray iteration
>> >>   net: devlink: replace devlink_mutex by per-devlink lock
>> >> 
>> >>  net/core/devlink.c | 256 ++++++++++++++++++++++++++++-----------------
>> >>  1 file changed, 161 insertions(+), 95 deletions(-)
>> >> 
>> >> -- 
>> >> 2.35.3
>> >> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-29 10:25       ` Jiri Pirko
@ 2022-06-29 10:36         ` Jiri Pirko
  2022-06-29 11:30           ` Ido Schimmel
  0 siblings, 1 reply; 13+ messages in thread
From: Jiri Pirko @ 2022-06-29 10:36 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

Wed, Jun 29, 2022 at 12:25:49PM CEST, jiri@resnulli.us wrote:
>Tue, Jun 28, 2022 at 09:43:26AM CEST, idosch@nvidia.com wrote:
>>On Mon, Jun 27, 2022 at 05:55:06PM +0200, Jiri Pirko wrote:
>>> Mon, Jun 27, 2022 at 05:41:31PM CEST, idosch@nvidia.com wrote:
>>> >On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
>>> >> From: Jiri Pirko <jiri@nvidia.com>
>>> >> 
>>> >> This is an attempt to remove use of devlink_mutex. This is a global lock
>>> >> taken for every user command. That causes that long operations performed
>>> >> on one devlink instance (like flash update) are blocking other
>>> >> operations on different instances.
>>> >
>>> >This patchset is supposed to prevent one devlink instance from blocking
>>> >another? Devlink does not enable "parallel_ops", which means that the
>>> >generic netlink mutex is serializing all user space operations. AFAICT,
>>> >this series does not enable "parallel_ops", so I'm not sure what
>>> >difference the removal of the devlink mutex makes.
>>> 
>>> You are correct, that is missing. For me, as a side effect this patchset
>>> resolved the deadlock for LC auxdev you pointed out. That was my
>>> motivation for this patchset :)
>>
>>Given that devlink does not enable "parallel_ops" and that the generic
>>netlink mutex is held throughout all callbacks, what prevents you from
>>simply dropping the devlink mutex now? IOW, why can't this series be
>>patch #1 and another patch that removes the devlink mutex?
>
>Yep, I think you are correct. We are currently working with Moshe on

Okay, I see the problem with what you suggested:
devlink_pernet_pre_exit()
There, devlink_mutex is taken to protect against simultaneous cmds
from being executed. That will be fixed with reload conversion to take
devlink->lock.


>conversion of commands that does not late devlink->lock (like health
>reporters and reload) to take devlink->lock. Once we have that, we can
>enable parallel_ops.
>
>>
>>> 
>>> 
>>> >
>>> >The devlink mutex (in accordance with the comment above it) serializes
>>> >all user space operations and accesses to the devlink devices list. This
>>> >resulted in a AA deadlock in the previous submission because we had a
>>> >flow where a user space operation (which acquires this mutex) also tries
>>> >to register / unregister a nested devlink instance which also tries to
>>> >acquire the mutex.
>>> >
>>> >As long as devlink does not implement "parallel_ops", it seems that the
>>> >devlink mutex can be reduced to only serializing accesses to the devlink
>>> >devices list, thereby eliminating the deadlock.
>>> >
>>> >> 
>>> >> The first patch makes sure that the xarray that holds devlink pointers
>>> >> is possible to be safely iterated.
>>> >> 
>>> >> The second patch moves the user command mutex to be per-devlink.
>>> >> 
>>> >> Jiri Pirko (2):
>>> >>   net: devlink: make sure that devlink_try_get() works with valid
>>> >>     pointer during xarray iteration
>>> >>   net: devlink: replace devlink_mutex by per-devlink lock
>>> >> 
>>> >>  net/core/devlink.c | 256 ++++++++++++++++++++++++++++-----------------
>>> >>  1 file changed, 161 insertions(+), 95 deletions(-)
>>> >> 
>>> >> -- 
>>> >> 2.35.3
>>> >> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-29 10:36         ` Jiri Pirko
@ 2022-06-29 11:30           ` Ido Schimmel
  2022-06-29 11:47             ` Jiri Pirko
  0 siblings, 1 reply; 13+ messages in thread
From: Ido Schimmel @ 2022-06-29 11:30 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

On Wed, Jun 29, 2022 at 12:36:24PM +0200, Jiri Pirko wrote:
> Wed, Jun 29, 2022 at 12:25:49PM CEST, jiri@resnulli.us wrote:
> >Tue, Jun 28, 2022 at 09:43:26AM CEST, idosch@nvidia.com wrote:
> >>On Mon, Jun 27, 2022 at 05:55:06PM +0200, Jiri Pirko wrote:
> >>> Mon, Jun 27, 2022 at 05:41:31PM CEST, idosch@nvidia.com wrote:
> >>> >On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
> >>> >> From: Jiri Pirko <jiri@nvidia.com>
> >>> >> 
> >>> >> This is an attempt to remove use of devlink_mutex. This is a global lock
> >>> >> taken for every user command. That causes that long operations performed
> >>> >> on one devlink instance (like flash update) are blocking other
> >>> >> operations on different instances.
> >>> >
> >>> >This patchset is supposed to prevent one devlink instance from blocking
> >>> >another? Devlink does not enable "parallel_ops", which means that the
> >>> >generic netlink mutex is serializing all user space operations. AFAICT,
> >>> >this series does not enable "parallel_ops", so I'm not sure what
> >>> >difference the removal of the devlink mutex makes.
> >>> 
> >>> You are correct, that is missing. For me, as a side effect this patchset
> >>> resolved the deadlock for LC auxdev you pointed out. That was my
> >>> motivation for this patchset :)
> >>
> >>Given that devlink does not enable "parallel_ops" and that the generic
> >>netlink mutex is held throughout all callbacks, what prevents you from
> >>simply dropping the devlink mutex now? IOW, why can't this series be
> >>patch #1 and another patch that removes the devlink mutex?
> >
> >Yep, I think you are correct. We are currently working with Moshe on
> 
> Okay, I see the problem with what you suggested:
> devlink_pernet_pre_exit()
> There, devlink_mutex is taken to protect against simultaneous cmds
> from being executed. That will be fixed with reload conversion to take
> devlink->lock.

OK, so this lock does not actually protect against simultaneous user
space operations (this is handled by the generic netlink mutex).
Instead, it protects against user space operations during netns
dismantle.

IIUC, the current plan is:

1. Get the devlink->lock rework done. Devlink will hold the lock for
every operation invocation and drivers will hold it while calling into
devlink via devl_lock().

This means 'DEVLINK_NL_FLAG_NO_LOCK' is removed and the lock will also
be taken in netns dismantle.

2. At this stage, the devlink mutex is only taken in devlink_register()
/ devlink_unregister() and some form of patch #1 will take care of that
so that this mutex can be removed.

3. Enable "parallel_ops"

?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch net-next RFC 0/2] net: devlink: remove devlink big lock
  2022-06-29 11:30           ` Ido Schimmel
@ 2022-06-29 11:47             ` Jiri Pirko
  0 siblings, 0 replies; 13+ messages in thread
From: Jiri Pirko @ 2022-06-29 11:47 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, davem, kuba, petrm, pabeni, edumazet, mlxsw, saeedm

Wed, Jun 29, 2022 at 01:30:07PM CEST, idosch@nvidia.com wrote:
>On Wed, Jun 29, 2022 at 12:36:24PM +0200, Jiri Pirko wrote:
>> Wed, Jun 29, 2022 at 12:25:49PM CEST, jiri@resnulli.us wrote:
>> >Tue, Jun 28, 2022 at 09:43:26AM CEST, idosch@nvidia.com wrote:
>> >>On Mon, Jun 27, 2022 at 05:55:06PM +0200, Jiri Pirko wrote:
>> >>> Mon, Jun 27, 2022 at 05:41:31PM CEST, idosch@nvidia.com wrote:
>> >>> >On Mon, Jun 27, 2022 at 03:54:59PM +0200, Jiri Pirko wrote:
>> >>> >> From: Jiri Pirko <jiri@nvidia.com>
>> >>> >> 
>> >>> >> This is an attempt to remove use of devlink_mutex. This is a global lock
>> >>> >> taken for every user command. That causes that long operations performed
>> >>> >> on one devlink instance (like flash update) are blocking other
>> >>> >> operations on different instances.
>> >>> >
>> >>> >This patchset is supposed to prevent one devlink instance from blocking
>> >>> >another? Devlink does not enable "parallel_ops", which means that the
>> >>> >generic netlink mutex is serializing all user space operations. AFAICT,
>> >>> >this series does not enable "parallel_ops", so I'm not sure what
>> >>> >difference the removal of the devlink mutex makes.
>> >>> 
>> >>> You are correct, that is missing. For me, as a side effect this patchset
>> >>> resolved the deadlock for LC auxdev you pointed out. That was my
>> >>> motivation for this patchset :)
>> >>
>> >>Given that devlink does not enable "parallel_ops" and that the generic
>> >>netlink mutex is held throughout all callbacks, what prevents you from
>> >>simply dropping the devlink mutex now? IOW, why can't this series be
>> >>patch #1 and another patch that removes the devlink mutex?
>> >
>> >Yep, I think you are correct. We are currently working with Moshe on
>> 
>> Okay, I see the problem with what you suggested:
>> devlink_pernet_pre_exit()
>> There, devlink_mutex is taken to protect against simultaneous cmds
>> from being executed. That will be fixed with reload conversion to take
>> devlink->lock.
>
>OK, so this lock does not actually protect against simultaneous user
>space operations (this is handled by the generic netlink mutex).
>Instead, it protects against user space operations during netns
>dismantle.
>
>IIUC, the current plan is:
>
>1. Get the devlink->lock rework done. Devlink will hold the lock for
>every operation invocation and drivers will hold it while calling into
>devlink via devl_lock().
>
>This means 'DEVLINK_NL_FLAG_NO_LOCK' is removed and the lock will also
>be taken in netns dismantle.
>
>2. At this stage, the devlink mutex is only taken in devlink_register()
>/ devlink_unregister() and some form of patch #1 will take care of that
>so that this mutex can be removed.
>
>3. Enable "parallel_ops"

Yes, exactly. With devlink_mutex removal in between 2 and 3.

>
>?

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-06-29 11:47 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-27 13:54 [patch net-next RFC 0/2] net: devlink: remove devlink big lock Jiri Pirko
2022-06-27 13:55 ` [patch net-next RFC 1/2] net: devlink: make sure that devlink_try_get() works with valid pointer during xarray iteration Jiri Pirko
2022-06-27 13:55 ` [patch net-next RFC 2/2] net: devlink: replace devlink_mutex by per-devlink lock Jiri Pirko
2022-06-27 15:41 ` [patch net-next RFC 0/2] net: devlink: remove devlink big lock Ido Schimmel
2022-06-27 15:55   ` Jiri Pirko
2022-06-28  7:43     ` Ido Schimmel
2022-06-29 10:25       ` Jiri Pirko
2022-06-29 10:36         ` Jiri Pirko
2022-06-29 11:30           ` Ido Schimmel
2022-06-29 11:47             ` Jiri Pirko
2022-06-27 17:49   ` Jakub Kicinski
2022-06-28  6:32     ` Jiri Pirko
2022-06-28  7:04       ` Jiri Pirko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.