linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: Saeed Mahameed <saeedm@nvidia.com>, Leon Romanovsky <leonro@nvidia.com>
Cc: netdev@vger.kernel.org, linux-rdma@vger.kernel.org,
	Mark Bloch <mbloch@nvidia.com>
Subject: [PATCH mlx5-next 14/14] net/mlx5: Lag, Create shared FDB when in switchdev mode
Date: Tue,  3 Aug 2021 16:19:59 -0700	[thread overview]
Message-ID: <20210803231959.26513-15-saeed@kernel.org> (raw)
In-Reply-To: <20210803231959.26513-1-saeed@kernel.org>

From: Mark Bloch <mbloch@nvidia.com>

If both eswitches are in switchdev mode and the uplink representors
are enslaved to the same bond device create a shared FDB configuration.

When moving to shared FDB mode not only the hardware needs be configured
but the RDMA driver needs to reconfigure itself.

When such change is done, unload the RDMA devices, configure the hardware
and load the RDMA representors.

When destroying the lag (can happen if a PCI function is unbinded,
driver is unloaded or by just removing a netdev from the bond) make sure
to restore the system to the previous state only if possible.

For example, if a PCI function is unbinded there is no need to load the
representors as the device is going away.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 118 +++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/lag.h |   3 +-
 .../net/ethernet/mellanox/mlx5/core/lag_mp.c  |   2 +-
 3 files changed, 105 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 89cd2b2af50a..f4dfa55c8c7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -32,7 +32,9 @@
 
 #include <linux/netdevice.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
 #include "lag.h"
@@ -45,7 +47,7 @@
 static DEFINE_SPINLOCK(lag_lock);
 
 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
-			       u8 remap_port2)
+			       u8 remap_port2, bool shared_fdb)
 {
 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
 
 	return mlx5_cmd_exec_in(dev, create_lag, in);
 }
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 }
 
 static int mlx5_create_lag(struct mlx5_lag *ldev,
-			   struct lag_tracker *tracker)
+			   struct lag_tracker *tracker,
+			   bool shared_fdb)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	int err;
 
 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
 				       &ldev->v2p_map[MLX5_LAG_P2]);
 
-	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+		       shared_fdb);
 
 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
-				  ldev->v2p_map[MLX5_LAG_P2]);
-	if (err)
+				  ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+	if (err) {
 		mlx5_core_err(dev0,
 			      "Failed to create LAG (%d)\n",
 			      err);
+		return err;
+	}
+
+	if (shared_fdb) {
+		err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+							      dev1->priv.eswitch);
+		if (err)
+			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+		else
+			mlx5_core_info(dev0, "Operation mode is single FDB\n");
+	}
+
+	if (err) {
+		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+			mlx5_core_err(dev0,
+				      "Failed to deactivate RoCE LAG; driver restart required\n");
+	}
+
 	return err;
 }
 
 int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      struct lag_tracker *tracker,
-		      u8 flags)
+		      u8 flags,
+		      bool shared_fdb)
 {
 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	int err;
 
-	err = mlx5_create_lag(ldev, tracker);
+	err = mlx5_create_lag(ldev, tracker, shared_fdb);
 	if (err) {
 		if (roce_lag) {
 			mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 	}
 
 	ldev->flags |= flags;
+	ldev->shared_fdb = shared_fdb;
 	return 0;
 }
 
@@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 
 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 
+	if (ldev->shared_fdb) {
+		mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+							 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+		ldev->shared_fdb = false;
+	}
+
 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
 	if (err) {
@@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
 		if (!ldev->pf[i].dev)
 			continue;
 
+		if (ldev->pf[i].dev->priv.flags &
+		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+			continue;
+
 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 	}
@@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+	bool shared_fdb = ldev->shared_fdb;
 	bool roce_lag;
 	int err;
 
 	roce_lag = __mlx5_lag_is_roce(ldev);
 
-	if (roce_lag) {
+	if (shared_fdb) {
+		mlx5_lag_remove_devices(ldev);
+	} else if (roce_lag) {
 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
 	if (err)
 		return;
 
-	if (roce_lag)
+	if (shared_fdb || roce_lag)
 		mlx5_lag_add_devices(ldev);
+
+	if (shared_fdb) {
+		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+			mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+		if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+			mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+	}
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+	if (is_mdev_switchdev_mode(dev0) &&
+	    is_mdev_switchdev_mode(dev1) &&
+	    mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+	    mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+	    mlx5_devcom_is_paired(dev0->priv.devcom,
+				  MLX5_DEVCOM_ESW_OFFLOADS) &&
+	    MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+	    MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+	    MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+		return true;
+
+	return false;
 }
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -380,6 +447,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 	}
 
 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
+		bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
 			   !mlx5_sriov_is_enabled(dev1);
 
@@ -389,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 			   dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 #endif
 
-		if (roce_lag)
+		if (shared_fdb || roce_lag)
 			mlx5_lag_remove_devices(ldev);
 
 		err = mlx5_activate_lag(ldev, &tracker,
 					roce_lag ? MLX5_LAG_FLAG_ROCE :
-					MLX5_LAG_FLAG_SRIOV);
+						   MLX5_LAG_FLAG_SRIOV,
+					shared_fdb);
 		if (err) {
-			if (roce_lag)
+			if (shared_fdb || roce_lag)
 				mlx5_lag_add_devices(ldev);
 
 			return;
-		}
-
-		if (roce_lag) {
+		} else if (roce_lag) {
 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
 			mlx5_nic_vport_enable_roce(dev1);
+		} else if (shared_fdb) {
+			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+			mlx5_rescan_drivers_locked(dev0);
+
+			err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+			if (!err)
+				err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+			if (err) {
+				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+				mlx5_rescan_drivers_locked(dev0);
+				mlx5_deactivate_lag(ldev);
+				mlx5_lag_add_devices(ldev);
+				mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+				mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+				mlx5_core_err(dev0, "Failed to enable lag\n");
+				return;
+			}
 		}
 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
 		mlx5_modify_lag(ldev, &tracker);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index e1d7a6671cf3..d4bae528954e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -73,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 		     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      struct lag_tracker *tracker,
-		      u8 flags);
+		      u8 flags,
+		      bool shared_fdb);
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 				struct net_device *ndev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index c4bf8b679541..011b639b29bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 		struct lag_tracker tracker;
 
 		tracker = ldev->tracker;
-		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
 	}
 
 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
-- 
2.31.1


  parent reply	other threads:[~2021-08-03 23:20 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-03 23:19 [PATCH mlx5-next 00/14] mlx5 single FDB for lag Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 01/14] net/mlx5: Return mdev from eswitch Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 02/14] net/mlx5: Lag, add initial logic for shared FDB Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 03/14] RDMA/mlx5: Fill port info based on the relevant eswitch Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 04/14] {net, RDMA}/mlx5: Extend send to vport rules Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 05/14] RDMA/mlx5: Add shared FDB support Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 06/14] net/mlx5: E-Switch, set flow source for send to uplink rule Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 07/14] net/mlx5e: Add an option to create a shared mapping Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 08/14] net/mlx5e: Use shared mappings for restoring from metadata Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 09/14] net/mlx5: E-Switch, Add event callback for representors Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 10/14] net/mlx5: Add send to vport rules on paired device Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 11/14] net/mlx5: Lag, properly lock eswitch if needed Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 12/14] net/mlx5: Lag, move lag destruction to a workqueue Saeed Mahameed
2021-08-03 23:19 ` [PATCH mlx5-next 13/14] net/mlx5/ E-Switch, add logic to enable shared FDB Saeed Mahameed
2021-08-03 23:19 ` Saeed Mahameed [this message]
2021-08-05 21:02 ` [PATCH mlx5-next 00/14] mlx5 single FDB for lag Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210803231959.26513-15-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).