All of lore.kernel.org
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Moshe Shemesh <moshe@nvidia.com>
Subject: [net-next 07/15] net/mlx5: Use devl_ API in mlx5e_devlink_port_register
Date: Wed,  6 Jul 2022 16:24:13 -0700	[thread overview]
Message-ID: <20220706232421.41269-8-saeed@kernel.org> (raw)
In-Reply-To: <20220706232421.41269-1-saeed@kernel.org>

From: Moshe Shemesh <moshe@nvidia.com>

As part of the flows invoked by mlx5_devlink_eswitch_mode_set() get to
mlx5_rescan_drivers_locked() which can call mlx5e_probe()/mlx5e_remove
and register/unregister mlx5e driver ports accordingly. This can lead to
deadlock once mlx5_devlink_eswitch_mode_set() will use devlink lock.
Use devl_port_register/unregister() instead of
devlink_port_register/unregister() and add devlink instance locks in the
driver paths to this function to have it locked while calling devl_ API
function.

If remove or probe were called by module init or module cleanup flows,
need to lock devlink just before calling devl_port_register(), otherwise
it is called by attach/detach or register/unregister flow and we can
have the flow locked. Added flag to distinguish between these cases.

This will be used by the downstream patch to invoke
mlx5_devlink_eswitch_mode_set() with devlink locked.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 29 +++++++++++++++++--
 .../ethernet/mellanox/mlx5/core/en/devlink.c  | 16 ++++++++--
 .../mellanox/mlx5/core/eswitch_offloads.c     |  2 ++
 include/linux/mlx5/driver.h                   |  4 +++
 4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 50422b56a64d..ccf2068d2e79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -335,13 +335,16 @@ static void del_adev(struct auxiliary_device *adev)
 
 int mlx5_attach_device(struct mlx5_core_dev *dev)
 {
+	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_priv *priv = &dev->priv;
 	struct auxiliary_device *adev;
 	struct auxiliary_driver *adrv;
 	int ret = 0, i;
 
+	devl_lock(devlink);
 	mutex_lock(&mlx5_intf_mutex);
 	priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
+	priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
 		if (!priv->adev[i]) {
 			bool is_supported = false;
@@ -389,19 +392,24 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
 			break;
 		}
 	}
+	priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	mutex_unlock(&mlx5_intf_mutex);
+	devl_unlock(devlink);
 	return ret;
 }
 
 void mlx5_detach_device(struct mlx5_core_dev *dev)
 {
+	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_priv *priv = &dev->priv;
 	struct auxiliary_device *adev;
 	struct auxiliary_driver *adrv;
 	pm_message_t pm = {};
 	int i;
 
+	devl_lock(devlink);
 	mutex_lock(&mlx5_intf_mutex);
+	priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
 		if (!priv->adev[i])
 			continue;
@@ -430,18 +438,24 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
 		del_adev(&priv->adev[i]->adev);
 		priv->adev[i] = NULL;
 	}
+	priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	priv->flags |= MLX5_PRIV_FLAGS_DETACH;
 	mutex_unlock(&mlx5_intf_mutex);
+	devl_unlock(devlink);
 }
 
 int mlx5_register_device(struct mlx5_core_dev *dev)
 {
+	struct devlink *devlink;
 	int ret;
 
+	devlink = priv_to_devlink(dev);
+	devl_lock(devlink);
 	mutex_lock(&mlx5_intf_mutex);
 	dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
 	ret = mlx5_rescan_drivers_locked(dev);
 	mutex_unlock(&mlx5_intf_mutex);
+	devl_unlock(devlink);
 	if (ret)
 		mlx5_unregister_device(dev);
 
@@ -450,10 +464,15 @@ int mlx5_register_device(struct mlx5_core_dev *dev)
 
 void mlx5_unregister_device(struct mlx5_core_dev *dev)
 {
+	struct devlink *devlink;
+
+	devlink = priv_to_devlink(dev);
+	devl_lock(devlink);
 	mutex_lock(&mlx5_intf_mutex);
 	dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
 	mlx5_rescan_drivers_locked(dev);
 	mutex_unlock(&mlx5_intf_mutex);
+	devl_unlock(devlink);
 }
 
 static int add_drivers(struct mlx5_core_dev *dev)
@@ -526,16 +545,22 @@ static void delete_drivers(struct mlx5_core_dev *dev)
 int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
+	int err = 0;
 
 	lockdep_assert_held(&mlx5_intf_mutex);
 	if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
 		return 0;
 
+	priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	delete_drivers(dev);
 	if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
-		return 0;
+		goto out;
+
+	err = add_drivers(dev);
 
-	return add_drivers(dev);
+out:
+	priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+	return err;
 }
 
 bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index ae52e7f38306..b69f9d10ccbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -21,6 +21,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 	struct netdev_phys_item_id ppid = {};
 	struct devlink_port *dl_port;
 	unsigned int dl_port_index;
+	int ret;
 
 	if (mlx5_core_is_pf(priv->mdev)) {
 		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
@@ -41,7 +42,13 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 	memset(dl_port, 0, sizeof(*dl_port));
 	devlink_port_attrs_set(dl_port, &attrs);
 
-	return devlink_port_register(devlink, dl_port, dl_port_index);
+	if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+		devl_lock(devlink);
+	ret = devl_port_register(devlink, dl_port, dl_port_index);
+	if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+		devl_unlock(devlink);
+
+	return ret;
 }
 
 void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
@@ -54,8 +61,13 @@ void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
 void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
 {
 	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+	struct devlink *devlink = priv_to_devlink(priv->mdev);
 
-	devlink_port_unregister(dl_port);
+	if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+		devl_lock(devlink);
+	devl_port_unregister(dl_port);
+	if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+		devl_unlock(devlink);
 }
 
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 1bfbc88f513f..ccda3a0a2594 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3400,7 +3400,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 		err = esw_offloads_start(esw, extack);
 	} else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
 		err = esw_offloads_stop(esw, extack);
+		devl_lock(devlink);
 		mlx5_rescan_drivers(esw->dev);
+		devl_unlock(devlink);
 	} else {
 		err = -EINVAL;
 	}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 76d7661e3e63..bd882884b23c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -551,6 +551,10 @@ enum {
 	 * creation/deletion on drivers rescan. Unset during device attach.
 	 */
 	MLX5_PRIV_FLAGS_DETACH = 1 << 2,
+	/* Distinguish between mlx5e_probe/remove called by module init/cleanup
+	 * and called by other flows which can already hold devlink lock
+	 */
+	MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW = 1 << 3,
 };
 
 struct mlx5_adev {
-- 
2.36.1


  parent reply	other threads:[~2022-07-06 23:24 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-06 23:24 [pull request][net-next 00/15] mlx5 updates 2022-07-06 Saeed Mahameed
2022-07-06 23:24 ` [net-next 01/15] net/mlx5: Remove devl_unlock from mlx5_eswtich_mode_callback_enter Saeed Mahameed
2022-07-06 23:24 ` [net-next 02/15] net/mlx5: Use devl_ API for rate nodes destroy Saeed Mahameed
2022-07-06 23:24 ` [net-next 03/15] devlink: Remove unused function devlink_rate_nodes_destroy Saeed Mahameed
2022-07-06 23:24 ` [net-next 04/15] net/mlx5: Use devl_ API in mlx5_esw_offloads_devlink_port_register Saeed Mahameed
2022-07-06 23:24 ` [net-next 05/15] net/mlx5: Use devl_ API in mlx5_esw_devlink_sf_port_register Saeed Mahameed
2022-07-06 23:24 ` [net-next 06/15] devlink: Remove unused functions devlink_rate_leaf_create/destroy Saeed Mahameed
2022-07-06 23:24 ` Saeed Mahameed [this message]
2022-07-06 23:24 ` [net-next 08/15] net/mlx5: Remove devl_unlock from mlx5_devlink_eswitch_mode_set Saeed Mahameed
2022-07-06 23:24 ` [net-next 09/15] devlink: Hold the instance lock in port_new / port_del callbacks Saeed Mahameed
2022-07-06 23:24 ` [net-next 10/15] net/tls: Perform immediate device ctx cleanup when possible Saeed Mahameed
2022-07-07  2:21   ` Jakub Kicinski
2022-07-07  6:51     ` Saeed Mahameed
2022-07-07 16:14       ` Jakub Kicinski
2022-07-07 17:29         ` Saeed Mahameed
2022-07-06 23:24 ` [net-next 11/15] net/tls: Multi-threaded calls to TX tls_dev_del Saeed Mahameed
2022-07-07  2:37   ` Jakub Kicinski
2022-07-07 22:14     ` Tariq Toukan
2022-07-08  0:17       ` Jakub Kicinski
2022-07-08 13:10         ` Maxim Mikityanskiy
2022-07-08 18:10           ` Jakub Kicinski
2022-07-06 23:24 ` [net-next 12/15] net/mlx5e: kTLS, Introduce TLS-specific create TIS Saeed Mahameed
2022-07-06 23:24 ` [net-next 13/15] net/mlx5e: kTLS, Take stats out of OOO handler Saeed Mahameed
2022-07-06 23:24 ` [net-next 14/15] net/mlx5e: kTLS, Recycle objects of device-offloaded TLS TX connections Saeed Mahameed
2022-07-06 23:24 ` [net-next 15/15] net/mlx5e: kTLS, Dynamically re-size TX recycling pool Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220706232421.41269-8-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=moshe@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.