netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeedm@mellanox.com>
To: Saeed Mahameed <saeedm@mellanox.com>,
	Leon Romanovsky <leonro@mellanox.com>
Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	Jianbo Liu <jianbol@mellanox.com>,
	Eli Britstein <elibr@mellanox.com>, Roi Dayan <roid@mellanox.com>,
	Mark Bloch <markb@mellanox.com>
Subject: [PATCH V2 mlx5-next 06/13] net/mlx5: E-Switch, Tag packet with vport number in VF vports and uplink ingress ACLs
Date: Tue, 25 Jun 2019 17:48:00 +0000	[thread overview]
Message-ID: <20190625174727.20309-7-saeedm@mellanox.com> (raw)
In-Reply-To: <20190625174727.20309-1-saeedm@mellanox.com>

From: Jianbo Liu <jianbol@mellanox.com>

When a dual-port VHCA sends a RoCE packet on its non-native port, and the
packet arrives to its affiliated vport FDB, a mismatch might occur on the
rules that match the packet source vport as it is not represented by single
VHCA only in this case. So we change to match on metadata instead of source
vport.
To do that, a rule is created in all vports and uplink ingress ACLs, to
save the source vport number and vhca id in the packet's metadata in order
to match on it later.
The metadata register used is the first of the 32-bit type C registers. It
can be used for matching and header modify operations. The higher 16 bits
of this register are for vhca id, and the lower 16 ones is for vport
number.
This change is not for dual-port RoCE only. If HW and FW allow, the vport
metadata matching is enabled by default.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |   2 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |   9 +
 .../mellanox/mlx5/core/eswitch_offloads.c     | 180 ++++++++++++++----
 include/linux/mlx5/eswitch.h                  |  17 ++
 4 files changed, 172 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index a42a23e505df..1235fd84ae3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
 
 	vport->ingress.drop_rule = NULL;
 	vport->ingress.allow_rule = NULL;
+
+	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
 }
 
 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 99dc25630629..51e71b824abf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -68,6 +68,8 @@ struct vport_ingress {
 	struct mlx5_flow_group *allow_spoofchk_only_grp;
 	struct mlx5_flow_group *allow_untagged_only_grp;
 	struct mlx5_flow_group *drop_grp;
+	int modify_metadata_id;
+	struct mlx5_flow_handle  *modify_metadata_rule;
 	struct mlx5_flow_handle  *allow_rule;
 	struct mlx5_flow_handle  *drop_rule;
 	struct mlx5_fc           *drop_counter;
@@ -196,6 +198,10 @@ struct mlx5_esw_functions {
 	u16			num_vfs;
 };
 
+enum {
+	MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
 struct mlx5_eswitch {
 	struct mlx5_core_dev    *dev;
 	struct mlx5_nb          nb;
@@ -203,6 +209,7 @@ struct mlx5_eswitch {
 	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
 	struct workqueue_struct *work_queue;
 	struct mlx5_vport       *vports;
+	u32 flags;
 	int                     total_vports;
 	int                     enabled_vports;
 	/* Synchronize between vport change events
@@ -240,6 +247,8 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
 				  struct mlx5_vport *vport);
 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 				   struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport);
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index c1c42c1370b8..4bcbc872cd08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
 static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 					     struct mlx5_vport *vport)
 {
-	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
 	/* For prio tag mode, there is only 1 FTEs:
-	 * 1) Untagged packets - push prio tag VLAN, allow
+	 * 1) Untagged packets - push prio tag VLAN and modify metadata if
+	 * required, allow
 	 * Unmatched traffic is allowed by default
 	 */
 
-	if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
-		return -EOPNOTSUPP;
-
-	esw_vport_cleanup_ingress_rules(esw, vport);
-
-	err = esw_vport_enable_ingress_acl(esw, vport);
-	if (err) {
-		mlx5_core_warn(esw->dev,
-			       "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
-			       err, vport->vport);
-		return err;
-	}
-
-	esw_debug(esw->dev,
-		  "vport[%d] configure ingress rules\n", vport->vport);
-
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
 		err = -ENOMEM;
@@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 	flow_act.vlan[0].ethtype = ETH_P_8021Q;
 	flow_act.vlan[0].vid = 0;
 	flow_act.vlan[0].prio = 0;
+
+	if (vport->ingress.modify_metadata_rule) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+		flow_act.modify_id = vport->ingress.modify_metadata_id;
+	}
+
 	vport->ingress.allow_rule =
 		mlx5_add_flow_rules(vport->ingress.acl, spec,
 				    &flow_act, NULL, 0);
@@ -1616,6 +1606,58 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 	return err;
 }
 
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+						     struct mlx5_vport *vport)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_spec spec = {};
+	int err = 0;
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+	MLX5_SET(set_action_in, action, data,
+		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+	err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+				       1, action, &vport->ingress.modify_metadata_id);
+	if (err) {
+		esw_warn(esw->dev,
+			 "failed to alloc modify header for vport %d ingress acl (%d)\n",
+			 vport->vport, err);
+		return err;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_act.modify_id = vport->ingress.modify_metadata_id;
+	vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+								  &spec, &flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+		err = PTR_ERR(vport->ingress.modify_metadata_rule);
+		esw_warn(esw->dev,
+			 "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.modify_metadata_rule = NULL;
+		goto out;
+	}
+
+out:
+	if (err)
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+	return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport)
+{
+	if (vport->ingress.modify_metadata_rule) {
+		mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+		vport->ingress.modify_metadata_rule = NULL;
+	}
+}
+
 static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
 					    struct mlx5_vport *vport)
 {
@@ -1623,6 +1665,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
+	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
 	/* For prio tag mode, there is only 1 FTEs:
 	 * 1) prio tag packets - pop the prio tag VLAN, allow
 	 * Unmatched traffic is allowed by default
@@ -1676,27 +1721,75 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
 	return err;
 }
 
-static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+					   struct mlx5_vport *vport)
 {
-	struct mlx5_vport *vport = NULL;
-	int i, j;
 	int err;
 
-	mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+	    !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	esw_vport_cleanup_ingress_rules(esw, vport);
+
+	err = esw_vport_enable_ingress_acl(esw, vport);
+	if (err) {
+		esw_warn(esw->dev,
+			 "failed to enable ingress acl (%d) on vport[%d]\n",
+			 err, vport->vport);
+		return err;
+	}
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure ingress rules\n", vport->vport);
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+		if (err)
+			goto out;
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+	    mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
 		err = esw_vport_ingress_prio_tag_config(esw, vport);
 		if (err)
-			goto err_ingress;
-		err = esw_vport_egress_prio_tag_config(esw, vport);
+			goto out;
+	}
+
+out:
+	if (err)
+		esw_vport_disable_ingress_acl(esw, vport);
+	return err;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i, j;
+	int err;
+
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		err = esw_vport_ingress_common_config(esw, vport);
 		if (err)
-			goto err_egress;
+			goto err_ingress;
+
+		if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+			err = esw_vport_egress_prio_tag_config(esw, vport);
+			if (err)
+				goto err_egress;
+		}
 	}
 
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+		esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
 	return 0;
 
 err_egress:
 	esw_vport_disable_ingress_acl(esw, vport);
 err_ingress:
-	mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+	for (j = MLX5_VPORT_PF; j < i; j++) {
+		vport = &esw->vports[j];
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 	}
@@ -1704,15 +1797,17 @@ static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
 	return err;
 }
 
-static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
 	int i;
 
-	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) {
+	mlx5_esw_for_all_vports(esw, i, vport) {
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 	}
+
+	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 }
 
 static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
@@ -1722,15 +1817,13 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
 	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
 	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
 
-	if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
-		err = esw_prio_tag_acls_config(esw, nvports);
-		if (err)
-			return err;
-	}
+	err = esw_create_offloads_acl_tables(esw);
+	if (err)
+		return err;
 
 	err = esw_create_offloads_fdb_tables(esw, nvports);
 	if (err)
-		return err;
+		goto create_fdb_err;
 
 	err = esw_create_offloads_table(esw, nvports);
 	if (err)
@@ -1748,6 +1841,9 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
 create_ft_err:
 	esw_destroy_offloads_fdb_tables(esw);
 
+create_fdb_err:
+	esw_destroy_offloads_acl_tables(esw);
+
 	return err;
 }
 
@@ -1756,8 +1852,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
 	esw_destroy_vport_rx_group(esw);
 	esw_destroy_offloads_table(esw);
 	esw_destroy_offloads_fdb_tables(esw);
-	if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
-		esw_prio_tag_acls_cleanup(esw);
+	esw_destroy_offloads_acl_tables(esw);
 }
 
 static void esw_functions_changed_event_handler(struct work_struct *work)
@@ -2296,3 +2391,16 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
 	return vport_num >= MLX5_VPORT_FIRST_VF &&
 	       vport_num <= esw->dev->priv.sriov.max_vfs;
 }
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+	return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+					      u16 vport_num)
+{
+	return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 174eec0871d9..aece3ae1902d 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -67,11 +67,28 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
 #ifdef CONFIG_MLX5_ESWITCH
 enum devlink_eswitch_encap_mode
 mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+					      u16 vport_num);
 #else  /* CONFIG_MLX5_ESWITCH */
 static inline enum devlink_eswitch_encap_mode
 mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
 {
 	return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
+
+static inline bool
+mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+	return false;
+};
+
+static inline u32
+mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+					  int vport_num)
+{
+	return 0;
+};
 #endif /* CONFIG_MLX5_ESWITCH */
 #endif
-- 
2.21.0


  parent reply	other threads:[~2019-06-25 17:48 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-25 17:47 [PATCH V2 mlx5-next 00/13] Mellanox, mlx5 vport metadata matching Saeed Mahameed
2019-06-25 17:47 ` [PATCH V2 mlx5-next 01/13] net/mlx5: Introduce vport metadata matching bits and enum constants Saeed Mahameed
2019-06-25 17:47 ` [PATCH V2 mlx5-next 02/13] net/mlx5: Get vport ACL namespace by vport index Saeed Mahameed
2019-06-25 17:47 ` [PATCH V2 mlx5-next 03/13] net/mlx5: Support allocating modify header context from ingress ACL Saeed Mahameed
2019-06-25 17:47 ` [PATCH V2 mlx5-next 04/13] net/mlx5: Introduce a helper API to check VF vport Saeed Mahameed
2019-06-25 17:47 ` [PATCH V2 mlx5-next 05/13] net/mlx5: Add flow context for flow tag Saeed Mahameed
2019-06-25 17:48 ` Saeed Mahameed [this message]
2019-06-25 17:48 ` [PATCH V2 mlx5-next 07/13] net/mlx5e: Specifying known origin of packets matching the flow Saeed Mahameed
2019-06-25 17:48 ` [PATCH V2 mlx5-next 08/13] net/mlx5: E-Switch, Add match on vport metadata for rule in fast path Saeed Mahameed
2019-06-25 17:48 ` [PATCH V2 mlx5-next 09/13] net/mlx5: E-Switch, Add query and modify esw vport context functions Saeed Mahameed
2019-06-25 17:48 ` [PATCH V2 mlx5-next 10/13] net/mlx5: E-Switch, Pass metadata from FDB to eswitch manager Saeed Mahameed
2019-06-25 17:48 ` [PATCH V2 mlx5-next 11/13] net/mlx5: E-Switch, Add match on vport metadata for rule in slow path Saeed Mahameed
2019-06-25 17:48 ` [PATCH V2 mlx5-next 12/13] RDMA/mlx5: Add vport metadata matching for IB representors Saeed Mahameed
2019-06-25 17:48 ` [PATCH V2 mlx5-next 13/13] net/mlx5: E-Switch, Enable vport metadata matching if firmware supports it Saeed Mahameed
2019-06-26 19:05 ` [PATCH V2 mlx5-next 00/13] Mellanox, mlx5 vport metadata matching Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190625174727.20309-7-saeedm@mellanox.com \
    --to=saeedm@mellanox.com \
    --cc=elibr@mellanox.com \
    --cc=jianbol@mellanox.com \
    --cc=leonro@mellanox.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=markb@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=roid@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).