netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Paul Blakey <paulb@mellanox.com>
To: Paul Blakey <paulb@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	Oz Shlomo <ozsh@mellanox.com>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	Vlad Buslov <vladbu@mellanox.com>,
	David Miller <davem@davemloft.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	Jiri Pirko <jiri@resnulli.us>, Roi Dayan <roid@mellanox.com>
Subject: [PATCH net-next-mlx5 v2 13/13] net/mlx5e: Restore tunnel metadata on miss
Date: Wed, 22 Jan 2020 15:52:58 +0200	[thread overview]
Message-ID: <1579701178-24624-14-git-send-email-paulb@mellanox.com> (raw)
In-Reply-To: <1579701178-24624-1-git-send-email-paulb@mellanox.com>

In tunnel and chains setup, we decapsulate the packets on first chain hop,
if we miss on later chains, the packet will comes up without tunnel header,
so it won't be taken by the tunnel device automatically, which fills the
tunnel metadata, and further tc tunnel matches won't work.

On miss, we get the tunnel mapping id, which was set on the chain 0 rule
that decapsulated the packet. This rule matched the tunnel outer
headers. From the tunnel mapping id, we get to this tunnel matches
and restore the equivalent tunnel info metadata dst on the skb.
We also set the skb->dev to the relevant device (tunnel device).
Now further tc processing can be done on the relevant device.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---

Changelog:
	V1->V2:
	   Guarded unused mlx5e_restore_tunnel if CONFIG_NET_TC_SKB_EXT isn't enabled

 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |  10 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 110 ++++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h |   9 +-
 3 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 4402a53..59d01a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1191,6 +1191,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_rep_priv *rpriv  = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
+	struct mlx5e_tc_update_priv tc_priv = {};
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
 	struct sk_buff *skb;
@@ -1223,11 +1224,13 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (rep->vlan && skb_vlan_tag_present(skb))
 		skb_vlan_pop(skb);
 
-	if (!mlx5e_tc_rep_update_skb(cqe, skb))
+	if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
 		goto free_wqe;
 
 	napi_gro_receive(rq->cq.napi, skb);
 
+	mlx5_tc_rep_post_napi_receive(&tc_priv);
+
 free_wqe:
 	mlx5e_free_rx_wqe(rq, wi, true);
 wq_cyc_pop:
@@ -1244,6 +1247,7 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
 	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
+	struct mlx5e_tc_update_priv tc_priv = {};
 	struct mlx5e_rx_wqe_ll *wqe;
 	struct mlx5_wq_ll *wq;
 	struct sk_buff *skb;
@@ -1276,11 +1280,13 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 
-	if (!mlx5e_tc_rep_update_skb(cqe, skb))
+	if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
 		goto mpwrq_cqe_out;
 
 	napi_gro_receive(rq->cq.napi, skb);
 
+	mlx5_tc_rep_post_napi_receive(&tc_priv);
+
 mpwrq_cqe_out:
 	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 841147c..f130afc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4587,19 +4587,102 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
 	mutex_unlock(&rpriv->unready_flows_lock);
 }
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+				 struct mlx5e_tc_update_priv *tc_priv,
+				 u32 tunnel_id)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct flow_dissector_key_enc_opts enc_opts = {};
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+	struct metadata_dst *tun_dst;
+	struct tunnel_match_key key;
+	u32 tun_id, enc_opts_id;
+	struct net_device *dev;
+	int err;
+
+	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+	tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+	if (!tun_id)
+		return true;
+
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &uplink_rpriv->uplink_priv;
+
+	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+	if (err) {
+		WARN_ON_ONCE(true);
+		netdev_dbg(priv->netdev,
+			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
+			   tun_id, err);
+		return false;
+	}
+
+	if (enc_opts_id) {
+		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+				   enc_opts_id, &enc_opts);
+		if (err) {
+			netdev_dbg(priv->netdev,
+				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+				   enc_opts_id, err);
+			return false;
+		}
+	}
+
+	tun_dst = tun_rx_dst(enc_opts.len);
+	if (!tun_dst) {
+		WARN_ON_ONCE(true);
+		return false;
+	}
+
+	ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+			   key.enc_ipv4.src, key.enc_ipv4.dst,
+			   key.enc_ip.tos, key.enc_ip.ttl,
+			   0, /* label */
+			   key.enc_tp.src, key.enc_tp.dst,
+			   key32_to_tunnel_id(key.enc_key_id.keyid),
+			   TUNNEL_KEY);
+
+	if (enc_opts.len)
+		ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data,
+					enc_opts.len, enc_opts.dst_opt_type);
+
+	skb_dst_set(skb, (struct dst_entry *)tun_dst);
+	dev = dev_get_by_index(&init_net, key.filter_ifindex);
+	if (!dev) {
+		netdev_dbg(priv->netdev,
+			   "Couldn't find tunnel device with ifindex: %d\n",
+			   key.filter_ifindex);
+		return false;
+	}
+
+	/* Set tun_dev so we do dev_put() after datapath */
+	tc_priv->tun_dev = dev;
+
+	skb->dev = dev;
+
+	return true;
+}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
 bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
-			     struct sk_buff *skb)
+			     struct sk_buff *skb,
+			     struct mlx5e_tc_update_priv *tc_priv)
 {
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	u32 chain = 0, reg_c0, reg_c1, tunnel_id;
 	struct tc_skb_ext *tc_skb_ext;
 	struct mlx5_eswitch *esw;
 	struct mlx5e_priv *priv;
-	u32 chain = 0, reg_c0;
+	int tunnel_moffset;
 	int err;
 
 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
 	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
 		reg_c0 = 0;
+	reg_c1 = be32_to_cpu(cqe->imm_inval_pkey);
 
 	if (!reg_c0)
 		return true;
@@ -4615,17 +4698,26 @@ bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
 		return false;
 	}
 
-	if (!chain)
-		return true;
+	if (chain) {
+		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		if (!tc_skb_ext) {
+			WARN_ON(1);
+			return false;
+		}
 
-	tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
-	if (!tc_skb_ext) {
-		WARN_ON_ONCE(1);
-		return false;
+		tc_skb_ext->chain = chain;
 	}
 
-	tc_skb_ext->chain = chain;
+	tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
+	tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
 #endif /* CONFIG_NET_TC_SKB_EXT */
 
 	return true;
 }
+
+void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+	if (tc_priv->tun_dev)
+		dev_put(tc_priv->tun_dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 2fab76b..21cbde4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -109,7 +109,14 @@ struct mlx5e_tc_attr_to_reg_mapping {
 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
 				    struct net_device *out_dev);
 
-bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
+struct mlx5e_tc_update_priv {
+	struct net_device *tun_dev;
+};
+
+bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
+			     struct mlx5e_tc_update_priv *tc_priv);
+
+void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
 
 struct mlx5e_tc_mod_hdr_acts {
 	int num_actions;
-- 
1.8.3.1


      parent reply	other threads:[~2020-01-22 13:53 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-22 13:52 [PATCH net-next v2 00/13] Handle multi chain hardware misses Paul Blakey
2020-01-22 13:52 ` [PATCH net-next v2 01/13] net: sched: support skb chain ext in tc classification path Paul Blakey
2020-01-22 15:29   ` Jakub Kicinski
2020-01-23  7:06     ` Jiri Pirko
2020-01-24  8:46     ` Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 02/13] net/mlx5: Add new driver lib for mappings unique ids to data Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 03/13] net/mlx5: E-Switch, Move source port on reg_c0 to the upper 16 bits Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 04/13] net/mlx5: E-Switch, Get reg_c0 value on CQE Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 05/13] net/mlx5: E-Switch, Mark miss packets with new chain id mapping Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 06/13] net/mlx5e: Rx, Split rep rx mpwqe handler from nic Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 07/13] net/mlx5: E-Switch, Restore chain id on miss Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 08/13] net/mlx5e: Allow re-allocating mod header actions Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 09/13] net/mlx5e: Move tc tunnel parsing logic with the rest at tc_tun module Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 10/13] net/mlx5e: Disallow inserting vxlan/vlan egress rules without decap/pop Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 11/13] net/mlx5e: Support inner header rewrite with goto action Paul Blakey
2020-01-22 13:52 ` [PATCH net-next-mlx5 v2 12/13] net/mlx5: E-Switch, Get reg_c1 value on miss Paul Blakey
2020-01-22 13:52 ` Paul Blakey [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1579701178-24624-14-git-send-email-paulb@mellanox.com \
    --to=paulb@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=ozsh@mellanox.com \
    --cc=roid@mellanox.com \
    --cc=saeedm@mellanox.com \
    --cc=vladbu@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).