All of lore.kernel.org
 help / color / mirror / Atom feed
From: Simon Horman <simon.horman@corigine.com>
To: David Miller <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: netdev@vger.kernel.org, oss-drivers@corigine.com,
	Hui Zhou <hui.zhou@corigine.com>,
	Ziyang Chen <ziyang.chen@corigine.com>
Subject: [PATCH net-next 2/3] nfp: flower: support hw offload for ct nat action
Date: Wed, 14 Sep 2022 17:06:03 +0100	[thread overview]
Message-ID: <20220914160604.1740282-3-simon.horman@corigine.com> (raw)
In-Reply-To: <20220914160604.1740282-1-simon.horman@corigine.com>

From: Hui Zhou <hui.zhou@corigine.com>

support ct nat action when pre_ct merge with post_ct
and nft. at the same time, add the extra checksum action
and hardware stats for nft to meet the action check when
do nat.

Signed-off-by: Hui Zhou <hui.zhou@corigine.com>
Reviewed-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 193 +++++++++++++++++-
 .../ethernet/netronome/nfp/flower/conntrack.h |   6 +
 2 files changed, 192 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index b3b2a23b8d89..235f02327f42 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -1,6 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2021 Corigine, Inc. */
 
+#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_ct.h>
+
 #include "conntrack.h"
 #include "../nfp_port.h"
 
@@ -56,9 +59,17 @@ bool is_pre_ct_flow(struct flow_cls_offload *flow)
 	int i;
 
 	flow_action_for_each(i, act, &flow->rule->action) {
-		if (act->id == FLOW_ACTION_CT && !act->ct.action)
-			return true;
+		if (act->id == FLOW_ACTION_CT) {
+			/* The pre_ct rule only have the ct or ct nat action, cannot
+			 * contains other ct action e.g ct commit and so on.
+			 */
+			if ((!act->ct.action || act->ct.action == TCA_CT_ACT_NAT))
+				return true;
+			else
+				return false;
+		}
 	}
+
 	return false;
 }
 
@@ -66,13 +77,37 @@ bool is_post_ct_flow(struct flow_cls_offload *flow)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_action_entry *act;
+	bool exist_ct_clear = false;
 	struct flow_match_ct ct;
+	int i;
+
+	/* post ct entry cannot contains any ct action except ct_clear. */
+	flow_action_for_each(i, act, &flow->rule->action) {
+		if (act->id == FLOW_ACTION_CT) {
+			/* ignore ct clear action. */
+			if (act->ct.action == TCA_CT_ACT_CLEAR) {
+				exist_ct_clear = true;
+				continue;
+			}
+
+			return false;
+		}
+	}
 
 	if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) {
 		flow_rule_match_ct(rule, &ct);
 		if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)
 			return true;
+	} else {
+		/* when do nat with ct, the post ct entry ignore the ct status,
+		 * will match the nat field(sip/dip) instead. In this situation,
+		 * the flow chain index is not zero and contains ct clear action.
+		 */
+		if (flow->common.chain_index && exist_ct_clear)
+			return true;
 	}
+
 	return false;
 }
 
@@ -168,6 +203,20 @@ static void *get_mangled_tos_ttl(struct flow_rule *rule, void *buf,
 	return buf;
 }
 
+/* Note entry1 and entry2 are not swappable. only skip ip and
+ * tport merge check for pre_ct and post_ct when pre_ct do nat.
+ */
+static bool nfp_ct_merge_check_cannot_skip(struct nfp_fl_ct_flow_entry *entry1,
+					   struct nfp_fl_ct_flow_entry *entry2)
+{
+	/* only pre_ct have NFP_FL_ACTION_DO_NAT flag. */
+	if ((entry1->flags & NFP_FL_ACTION_DO_NAT) &&
+	    entry2->type == CT_TYPE_POST_CT)
+		return false;
+
+	return true;
+}
+
 /* Note entry1 and entry2 are not swappable, entry1 should be
  * the former flow whose mangle action need be taken into account
  * if existed, and entry2 should be the latter flow whose action
@@ -225,7 +274,12 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
 			goto check_failed;
 	}
 
-	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+	/* if pre ct entry do nat, the nat ip exists in nft entry,
+	 * will be do merge check when do nft and post ct merge,
+	 * so skip this ip merge check here.
+	 */
+	if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) &&
+	    nfp_ct_merge_check_cannot_skip(entry1, entry2)) {
 		struct flow_match_ipv4_addrs match1, match2;
 
 		flow_rule_match_ipv4_addrs(entry1->rule, &match1);
@@ -242,7 +296,12 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
 			goto check_failed;
 	}
 
-	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+	/* if pre ct entry do nat, the nat ip exists in nft entry,
+	 * will be do merge check when do nft and post ct merge,
+	 * so skip this ip merge check here.
+	 */
+	if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) &&
+	    nfp_ct_merge_check_cannot_skip(entry1, entry2)) {
 		struct flow_match_ipv6_addrs match1, match2;
 
 		flow_rule_match_ipv6_addrs(entry1->rule, &match1);
@@ -259,7 +318,12 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
 			goto check_failed;
 	}
 
-	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) {
+	/* if pre ct entry do nat, the nat tport exists in nft entry,
+	 * will be do merge check when do nft and post ct merge,
+	 * so skip this tport merge check here.
+	 */
+	if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) &&
+	    nfp_ct_merge_check_cannot_skip(entry1, entry2)) {
 		enum flow_action_mangle_base htype = FLOW_ACT_MANGLE_UNSPEC;
 		struct flow_match_ports match1, match2;
 
@@ -468,6 +532,12 @@ static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry,
 			return -EINVAL;
 
 		return 0;
+	} else {
+		/* post_ct with ct clear action will not match the
+		 * ct status when nft is nat entry.
+		 */
+		if (nft_entry->flags & NFP_FL_ACTION_DO_MANGLE)
+			return 0;
 	}
 
 	return -EINVAL;
@@ -537,11 +607,37 @@ nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map)
 	return key_size;
 }
 
+/* get the csum flag according the ip proto and mangle action. */
+static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u32 *csum)
+{
+	if (a_in->id != FLOW_ACTION_MANGLE)
+		return;
+
+	switch (a_in->mangle.htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+		*csum |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
+		if (ip_proto == IPPROTO_TCP)
+			*csum |= TCA_CSUM_UPDATE_FLAG_TCP;
+		else if (ip_proto == IPPROTO_UDP)
+			*csum |= TCA_CSUM_UPDATE_FLAG_UDP;
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+		*csum |= TCA_CSUM_UPDATE_FLAG_TCP;
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+		*csum |= TCA_CSUM_UPDATE_FLAG_UDP;
+		break;
+	default:
+		break;
+	}
+}
+
 static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
 					struct nfp_flower_priv *priv,
 					struct net_device *netdev,
 					struct nfp_fl_payload *flow_pay)
 {
+	enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE;
 	struct flow_action_entry *a_in;
 	int i, j, num_actions, id;
 	struct flow_rule *a_rule;
@@ -551,15 +647,25 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
 		      rules[CT_TYPE_NFT]->action.num_entries +
 		      rules[CT_TYPE_POST_CT]->action.num_entries;
 
-	a_rule = flow_rule_alloc(num_actions);
+	/* Add one action to make sure there is enough room to add an checksum action
+	 * when do nat.
+	 */
+	a_rule = flow_rule_alloc(num_actions + 1);
 	if (!a_rule)
 		return -ENOMEM;
 
 	/* Actions need a BASIC dissector. */
 	a_rule->match = rules[CT_TYPE_PRE_CT]->match;
+	/* post_ct entry have one action at least. */
+	if (rules[CT_TYPE_POST_CT]->action.num_entries != 0) {
+		tmp_stats = rules[CT_TYPE_POST_CT]->action.entries[0].hw_stats;
+	}
 
 	/* Copy actions */
 	for (j = 0; j < _CT_TYPE_MAX; j++) {
+		u32 csum_updated = 0;
+		u8 ip_proto = 0;
+
 		if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) {
 			struct flow_match_basic match;
 
@@ -571,8 +677,10 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
 			 * through the subflows and assign the proper subflow to a_rule
 			 */
 			flow_rule_match_basic(rules[j], &match);
-			if (match.mask->ip_proto)
+			if (match.mask->ip_proto) {
 				a_rule->match = rules[j]->match;
+				ip_proto = match.key->ip_proto;
+			}
 		}
 
 		for (i = 0; i < rules[j]->action.num_entries; i++) {
@@ -589,11 +697,32 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
 			case FLOW_ACTION_CT_METADATA:
 				continue;
 			default:
+				/* nft entry is generated by tc ct, which mangle action do not care
+				 * the stats, inherit the post entry stats to meet the
+				 * flow_action_hw_stats_check.
+				 */
+				if (j == CT_TYPE_NFT) {
+					if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE)
+						a_in->hw_stats = tmp_stats;
+					nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated);
+				}
 				memcpy(&a_rule->action.entries[offset++],
 				       a_in, sizeof(struct flow_action_entry));
 				break;
 			}
 		}
+		/* nft entry have mangle action, but do not have checksum action when do NAT,
+		 * hardware will automatically fix IPv4 and TCP/UDP checksum. so add an csum action
+		 * to meet csum action check.
+		 */
+		if (csum_updated) {
+			struct flow_action_entry *csum_action;
+
+			csum_action = &a_rule->action.entries[offset++];
+			csum_action->id = FLOW_ACTION_CSUM;
+			csum_action->csum_flags = csum_updated;
+			csum_action->hw_stats = tmp_stats;
+		}
 	}
 
 	/* Some actions would have been ignored, so update the num_entries field */
@@ -1191,6 +1320,49 @@ static struct net_device *get_netdev_from_rule(struct flow_rule *rule)
 	return NULL;
 }
 
+static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_action)
+{
+	if (mangle_action->id != FLOW_ACTION_MANGLE)
+		return;
+
+	switch (mangle_action->mangle.htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+		mangle_action->mangle.val = (__force u32)cpu_to_be32(mangle_action->mangle.val);
+		mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask);
+		return;
+
+	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+		mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val);
+		mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask);
+		return;
+
+	default:
+		return;
+	}
+}
+
+static int nfp_nft_ct_set_flow_flag(struct flow_action_entry *act,
+				    struct nfp_fl_ct_flow_entry *entry)
+{
+	switch (act->id) {
+	case FLOW_ACTION_CT:
+		if (act->ct.action == TCA_CT_ACT_NAT)
+			entry->flags |= NFP_FL_ACTION_DO_NAT;
+		break;
+
+	case FLOW_ACTION_MANGLE:
+		entry->flags |= NFP_FL_ACTION_DO_MANGLE;
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static struct
 nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
 					 struct net_device *netdev,
@@ -1257,6 +1429,13 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
 
 		new_act = &entry->rule->action.entries[i];
 		memcpy(new_act, act, sizeof(struct flow_action_entry));
+		/* nft entry mangle field is host byte order, need translate to
+		 * network byte order.
+		 */
+		if (is_nft)
+			nfp_nft_ct_translate_mangle_action(new_act);
+
+		nfp_nft_ct_set_flow_flag(new_act, entry);
 		/* Entunnel is a special case, need to allocate and copy
 		 * tunnel info.
 		 */
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index beb6cceff9d8..762c0b36e269 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -103,6 +103,10 @@ enum nfp_nfp_layer_name {
 	_FLOW_PAY_LAYERS_MAX
 };
 
+/* NFP flow entry flags. */
+#define NFP_FL_ACTION_DO_NAT		BIT(0)
+#define NFP_FL_ACTION_DO_MANGLE		BIT(1)
+
 /**
  * struct nfp_fl_ct_flow_entry - Flow entry containing conntrack flow information
  * @cookie:	Flow cookie, same as original TC flow, used as key
@@ -115,6 +119,7 @@ enum nfp_nfp_layer_name {
  * @rule:	Reference to the original TC flow rule
  * @stats:	Used to cache stats for updating
  * @tun_offset: Used to indicate tunnel action offset in action list
+ * @flags:	Used to indicate flow flag like NAT which used by merge.
  */
 struct nfp_fl_ct_flow_entry {
 	unsigned long cookie;
@@ -127,6 +132,7 @@ struct nfp_fl_ct_flow_entry {
 	struct flow_rule *rule;
 	struct flow_stats stats;
 	u8 tun_offset;		// Set to NFP_FL_CT_NO_TUN if no tun
+	u8 flags;
 };
 
 /**
-- 
2.30.2


  parent reply	other threads:[~2022-09-14 16:09 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-14 16:06 [PATCH net-next 0/3] nfp: flower: police validation and ct enhancements Simon Horman
2022-09-14 16:06 ` [PATCH net-next 1/3] nfp: flower: add validation of for police actions which are independent of flows Simon Horman
2022-09-14 16:06 ` Simon Horman [this message]
2022-09-14 16:06 ` [PATCH net-next 3/3] nfp: flower: support vlan action in pre_ct Simon Horman
2022-09-20 20:40 ` [PATCH net-next 0/3] nfp: flower: police validation and ct enhancements patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220914160604.1740282-3-simon.horman@corigine.com \
    --to=simon.horman@corigine.com \
    --cc=davem@davemloft.net \
    --cc=hui.zhou@corigine.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=oss-drivers@corigine.com \
    --cc=pabeni@redhat.com \
    --cc=ziyang.chen@corigine.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.