netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Paul Blakey <paulb@mellanox.com>
To: Paul Blakey <paulb@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	Oz Shlomo <ozsh@mellanox.com>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	Vlad Buslov <vladbu@mellanox.com>,
	David Miller <davem@davemloft.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	Jiri Pirko <jiri@resnulli.us>
Subject: [PATCH net-next-mlx5 05/13] net/mlx5: E-Switch, Mark miss packets with new chain id mapping
Date: Tue, 21 Jan 2020 18:16:14 +0200	[thread overview]
Message-ID: <1579623382-6934-6-git-send-email-paulb@mellanox.com> (raw)
In-Reply-To: <1579623382-6934-1-git-send-email-paulb@mellanox.com>

Currently, if we miss in hardware after jumping to some chain,
we continue in chain 0 in software. This is wrong, and with the new
tc skb extension we can now restore the chain id on the skb, so
tc can continue with in the correct chain.

To restore the chain id in software after a miss in hardware, we create
a register mapping from 32bit chain ids to 16bit of reg_c0 (that
survives loopback), to 32bit chain ids. We then mark packets that
miss on some chain with the current chain id mapping on their reg_c0
field. Using this mapping, we will support up to 64K concurrent
chains.

This register survives loopback and gets to the CQE on flow_tag
via the eswitch restore rules.

In next commit, we will reverse the mapping we got on the CQE
to a chain id and tell tc to continue in the sw chain where we
left off via the tc skb extension.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |   8 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    |  12 ++
 .../mellanox/mlx5/core/eswitch_offloads_chains.c   | 130 ++++++++++++++++++++-
 .../mellanox/mlx5/core/eswitch_offloads_chains.h   |   4 +-
 4 files changed, 150 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 26f559b..427432f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -153,6 +153,14 @@ struct mlx5e_tc_flow_parse_attr {
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 
+struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
+	[CHAIN_TO_REG] = {
+		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+		.moffset = 0,
+		.mlen = 2,
+	},
+};
+
 struct mlx5e_hairpin {
 	struct mlx5_hairpin *pair;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 262cdb7..e2dbbae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -91,6 +91,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
 
+enum mlx5e_tc_attr_to_reg {
+	CHAIN_TO_REG,
+};
+
+struct mlx5e_tc_attr_to_reg_mapping {
+	int mfield; /* rewrite field */
+	int moffset; /* offset of mfield */
+	int mlen; /* bytes to rewrite/match */
+};
+
+extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
+
 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
 				    struct net_device *out_dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
index 3a60eb5..4f9b896 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
@@ -6,14 +6,17 @@
 #include <linux/mlx5/fs.h>
 
 #include "eswitch_offloads_chains.h"
+#include "lib/mapping.h"
 #include "mlx5_core.h"
 #include "fs_core.h"
 #include "eswitch.h"
 #include "en.h"
+#include "en_tc.h"
 
 #define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
 #define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
 #define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
+#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping)
 #define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
 #define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
 #define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
@@ -44,6 +47,7 @@ struct mlx5_esw_chains_priv {
 	struct mutex lock;
 
 	struct mlx5_flow_table *tc_end_fdb;
+	struct mapping_ctx *chains_mapping;
 
 	int fdb_left[ARRAY_SIZE(ESW_POOLS)];
 };
@@ -54,9 +58,12 @@ struct fdb_chain {
 	u32 chain;
 
 	int ref;
+	int id;
 
 	struct mlx5_eswitch *esw;
 	struct list_head prios_list;
+	struct mlx5_flow_handle *restore_rule;
+	struct mlx5_modify_hdr *miss_modify_hdr;
 };
 
 struct fdb_prio_key {
@@ -255,6 +262,70 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 	mlx5_destroy_flow_table(fdb);
 }
 
+static int
+create_fdb_chain_restore(struct fdb_chain *fdb_chain)
+{
+	char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)];
+	struct mlx5_eswitch *esw = fdb_chain->esw;
+	struct mlx5_modify_hdr *mod_hdr;
+	u32 index;
+	int err;
+
+	if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw))
+		return 0;
+
+	err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index);
+	if (err)
+		return err;
+	if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
+		/* we got the special default flow tag id, so we won't know
+		 * if we actually marked the packet with the restore rule
+		 * we create.
+		 *
+		 * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
+		 */
+		err = mapping_add(esw_chains_mapping(esw),
+				  &fdb_chain->chain, &index);
+		mapping_remove(esw_chains_mapping(esw),
+			       MLX5_FS_DEFAULT_FLOW_TAG);
+		if (err)
+			return err;
+	}
+
+	fdb_chain->id = index;
+
+	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, modact, field,
+		 mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield);
+	MLX5_SET(set_action_in, modact, offset,
+		 mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8);
+	MLX5_SET(set_action_in, modact, length,
+		 mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8);
+	MLX5_SET(set_action_in, modact, data, fdb_chain->id);
+	mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
+					   1, modact);
+	if (IS_ERR(mod_hdr)) {
+		err = PTR_ERR(mod_hdr);
+		goto err_mod_hdr;
+	}
+	fdb_chain->miss_modify_hdr = mod_hdr;
+
+	fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id);
+	if (IS_ERR(fdb_chain->restore_rule)) {
+		err = PTR_ERR(fdb_chain->restore_rule);
+		goto err_rule;
+	}
+
+	return 0;
+
+err_rule:
+	mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
+err_mod_hdr:
+	/* Datapath can't find this mapping, so we can safely remove it */
+	mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
+	return err;
+}
+
 static struct fdb_chain *
 mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
 {
@@ -269,6 +340,10 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 	fdb_chain->chain = chain;
 	INIT_LIST_HEAD(&fdb_chain->prios_list);
 
+	err = create_fdb_chain_restore(fdb_chain);
+	if (err)
+		goto err_restore;
+
 	err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
 				     chain_params);
 	if (err)
@@ -277,6 +352,12 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 	return fdb_chain;
 
 err_insert:
+	if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
+		mlx5_del_flow_rules(fdb_chain->restore_rule);
+		mlx5_modify_header_dealloc(esw->dev,
+					   fdb_chain->miss_modify_hdr);
+	}
+err_restore:
 	kvfree(fdb_chain);
 	return ERR_PTR(err);
 }
@@ -288,6 +369,15 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 
 	rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
 			       chain_params);
+
+	if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
+		mlx5_del_flow_rules(fdb_chain->restore_rule);
+		mlx5_modify_header_dealloc(esw->dev,
+					   fdb_chain->miss_modify_hdr);
+
+		mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
+	}
+
 	kvfree(fdb_chain);
 }
 
@@ -310,10 +400,12 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
+mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain,
+			      struct mlx5_flow_table *fdb,
 			      struct mlx5_flow_table *next_fdb)
 {
 	static const struct mlx5_flow_spec spec = {};
+	struct mlx5_eswitch *esw = fdb_chain->esw;
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act act = {};
 
@@ -322,6 +414,11 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest.ft = next_fdb;
 
+	if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
+		act.modify_hdr = fdb_chain->miss_modify_hdr;
+		act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	}
+
 	return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
 }
 
@@ -345,7 +442,8 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 	list_for_each_entry_continue_reverse(pos,
 					     &fdb_chain->prios_list,
 					     list) {
-		miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
+		miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain,
+							      pos->fdb,
 							      next_fdb);
 		if (IS_ERR(miss_rules[n])) {
 			err = PTR_ERR(miss_rules[n]);
@@ -459,7 +557,7 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
 	}
 
 	/* Add miss rule to next_fdb */
-	miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
+	miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb);
 	if (IS_ERR(miss_rule)) {
 		err = PTR_ERR(miss_rule);
 		goto err_miss_rule;
@@ -624,6 +722,7 @@ struct mlx5_flow_table *
 	struct mlx5_esw_chains_priv *chains_priv;
 	struct mlx5_core_dev *dev = esw->dev;
 	u32 max_flow_counter, fdb_max;
+	struct mapping_ctx *mapping;
 	int err;
 
 	chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
@@ -660,10 +759,20 @@ struct mlx5_flow_table *
 	if (err)
 		goto init_prios_ht_err;
 
+	mapping = mapping_idr_create(sizeof(u32),
+				     esw_get_max_restore_tag(esw), true);
+	if (IS_ERR(mapping)) {
+		err = PTR_ERR(mapping);
+		goto mapping_err;
+	}
+	esw_chains_mapping(esw) = mapping;
+
 	mutex_init(&esw_chains_lock(esw));
 
 	return 0;
 
+mapping_err:
+	rhashtable_destroy(&esw_prios_ht(esw));
 init_prios_ht_err:
 	rhashtable_destroy(&esw_chains_ht(esw));
 init_chains_ht_err:
@@ -675,6 +784,7 @@ struct mlx5_flow_table *
 mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
 {
 	mutex_destroy(&esw_chains_lock(esw));
+	mapping_idr_destroy(esw_chains_mapping(esw));
 	rhashtable_destroy(&esw_prios_ht(esw));
 	rhashtable_destroy(&esw_chains_ht(esw));
 
@@ -756,3 +866,17 @@ struct mlx5_flow_table *
 	mlx5_esw_chains_close(esw);
 	mlx5_esw_chains_cleanup(esw);
 }
+
+int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag,
+				   u32 *chain)
+{
+	int err;
+
+	err = mapping_find(esw_chains_mapping(esw), tag, chain);
+	if (err) {
+		esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag);
+		return -ENOENT;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
index 2e13097..da45e49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
@@ -26,5 +26,7 @@ struct mlx5_flow_table *
 int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
 void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
 
-#endif /* __ML5_ESW_CHAINS_H__ */
+int
+mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
 
+#endif /* __ML5_ESW_CHAINS_H__ */
-- 
1.8.3.1


  parent reply	other threads:[~2020-01-21 16:17 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-21 16:16 [PATCH net-next 00/13] Handle multi chain hardware misses Paul Blakey
2020-01-21 16:16 ` [PATCH net-next 01/13] net: sched: support skb chain ext in tc classification path Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 02/13] net/mlx5: Add new driver lib for mappings unique ids to data Paul Blakey
2020-01-21 19:04   ` Leon Romanovsky
2020-01-22 12:17     ` Paul Blakey
2020-01-22 13:51       ` Leon Romanovsky
2020-01-21 16:16 ` [PATCH net-next-mlx5 03/13] net/mlx5: E-Switch, Move source port on reg_c0 to the upper 16 bits Paul Blakey
2020-01-21 19:08   ` Leon Romanovsky
2020-01-22 13:42     ` Paul Blakey
2020-01-22 13:50       ` Leon Romanovsky
2020-01-21 16:16 ` [PATCH net-next-mlx5 04/13] net/mlx5: E-Switch, Get reg_c0 value on CQE Paul Blakey
2020-01-21 16:16 ` Paul Blakey [this message]
2020-01-21 16:16 ` [PATCH net-next-mlx5 06/13] net/mlx5e: Rx, Split rep rx mpwqe handler from nic Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 07/13] net/mlx5: E-Switch, Restore chain id on miss Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 08/13] net/mlx5e: Allow re-allocating mod header actions Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 09/13] net/mlx5e: Move tc tunnel parsing logic with the rest at tc_tun module Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 10/13] net/mlx5e: Disallow inserting vxlan/vlan egress rules without decap/pop Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 11/13] net/mlx5e: Support inner header rewrite with goto action Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 12/13] net/mlx5: E-Switch, Get reg_c1 value on miss Paul Blakey
2020-01-21 16:16 ` [PATCH net-next-mlx5 13/13] net/mlx5e: Restore tunnel metadata " Paul Blakey
2020-01-21 21:18 ` [PATCH net-next 00/13] Handle multi chain hardware misses Saeed Mahameed
2020-01-23  9:54   ` David Miller
2020-01-24 20:26     ` Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1579623382-6934-6-git-send-email-paulb@mellanox.com \
    --to=paulb@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=ozsh@mellanox.com \
    --cc=saeedm@mellanox.com \
    --cc=vladbu@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).