All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vlad Buslov <vladbu@nvidia.com>
To: <davem@davemloft.net>, <kuba@kernel.org>, <pabeni@redhat.com>,
	<pablo@netfilter.org>
Cc: <netdev@vger.kernel.org>, <netfilter-devel@vger.kernel.org>,
	<jhs@mojatatu.com>, <xiyou.wangcong@gmail.com>,
	<jiri@resnulli.us>, <ozsh@nvidia.com>,
	<marcelo.leitner@gmail.com>, <simon.horman@corigine.com>,
	Vlad Buslov <vladbu@nvidia.com>
Subject: [PATCH net-next v5 2/7] netfilter: flowtable: fixup UDP timeout depending on ct state
Date: Fri, 27 Jan 2023 19:38:40 +0100	[thread overview]
Message-ID: <20230127183845.597861-3-vladbu@nvidia.com> (raw)
In-Reply-To: <20230127183845.597861-1-vladbu@nvidia.com>

Currently flow_offload_fixup_ct() function assumes that only replied UDP
connections can be offloaded and hardcodes UDP_CT_REPLIED timeout value.
Allow users to modify timeout calculation by implementing new flowtable
type callback 'timeout' and use the existing algorithm otherwise.

To enable UDP NEW connection offload in following patches implement
'timeout' callback in flowtable_ct of act_ct which extracts the actual
connections state from ct->status and set the timeout according to it.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
---

Notes:
    Changes V3 -> V4:
    
    - Rework the patch to decouple netfilter and act_ct timeout fixup
    algorithms.

 include/net/netfilter/nf_flow_table.h |  6 +++-
 net/netfilter/nf_flow_table_core.c    | 40 +++++++++++++++++++--------
 net/netfilter/nf_flow_table_ip.c      | 17 ++++++------
 net/sched/act_ct.c                    | 35 ++++++++++++++++++++++-
 4 files changed, 76 insertions(+), 22 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index cd982f4a0f50..a3e4b5127ad0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -61,6 +61,9 @@ struct nf_flowtable_type {
 						  enum flow_offload_tuple_dir dir,
 						  struct nf_flow_rule *flow_rule);
 	void				(*free)(struct nf_flowtable *ft);
+	bool				(*timeout)(struct nf_flowtable *ft,
+						   struct flow_offload *flow,
+						   s32 *val);
 	nf_hookfn			*hook;
 	struct module			*owner;
 };
@@ -278,7 +281,8 @@ void nf_flow_table_cleanup(struct net_device *dev);
 int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
 
-void flow_offload_teardown(struct flow_offload *flow);
+void flow_offload_teardown(struct nf_flowtable *flow_table,
+			   struct flow_offload *flow);
 
 void nf_flow_snat_port(const struct flow_offload *flow,
 		       struct sk_buff *skb, unsigned int thoff,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 81c26a96c30b..e3eeea349c8d 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -178,28 +178,43 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 	tcp->seen[1].td_maxwin = 0;
 }
 
-static void flow_offload_fixup_ct(struct nf_conn *ct)
+static bool flow_offload_timeout_default(struct nf_conn *ct, s32 *timeout)
 {
 	struct net *net = nf_ct_net(ct);
 	int l4num = nf_ct_protonum(ct);
-	s32 timeout;
 
 	if (l4num == IPPROTO_TCP) {
 		struct nf_tcp_net *tn = nf_tcp_pernet(net);
 
 		flow_offload_fixup_tcp(&ct->proto.tcp);
 
-		timeout = tn->timeouts[ct->proto.tcp.state];
-		timeout -= tn->offload_timeout;
+		*timeout = tn->timeouts[ct->proto.tcp.state];
+		*timeout -= tn->offload_timeout;
 	} else if (l4num == IPPROTO_UDP) {
 		struct nf_udp_net *tn = nf_udp_pernet(net);
 
-		timeout = tn->timeouts[UDP_CT_REPLIED];
-		timeout -= tn->offload_timeout;
+		*timeout = tn->timeouts[UDP_CT_REPLIED];
+		*timeout -= tn->offload_timeout;
 	} else {
-		return;
+		return false;
 	}
 
+	return true;
+}
+
+static void flow_offload_fixup_ct(struct nf_flowtable *flow_table,
+				  struct flow_offload *flow)
+{
+	struct nf_conn *ct = flow->ct;
+	bool needs_fixup;
+	s32 timeout;
+
+	needs_fixup = flow_table->type->timeout ?
+		flow_table->type->timeout(flow_table, flow, &timeout) :
+		flow_offload_timeout_default(ct, &timeout);
+	if (!needs_fixup)
+		return;
+
 	if (timeout < 0)
 		timeout = 0;
 
@@ -348,11 +363,12 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 	flow_offload_free(flow);
 }
 
-void flow_offload_teardown(struct flow_offload *flow)
+void flow_offload_teardown(struct nf_flowtable *flow_table,
+			   struct flow_offload *flow)
 {
 	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
 	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-	flow_offload_fixup_ct(flow->ct);
+	flow_offload_fixup_ct(flow_table, flow);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);
 
@@ -421,7 +437,7 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
 {
 	if (nf_flow_has_expired(flow) ||
 	    nf_ct_is_dying(flow->ct))
-		flow_offload_teardown(flow);
+		flow_offload_teardown(flow_table, flow);
 
 	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
 		if (test_bit(NF_FLOW_HW, &flow->flags)) {
@@ -569,14 +585,14 @@ static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
 	struct net_device *dev = data;
 
 	if (!dev) {
-		flow_offload_teardown(flow);
+		flow_offload_teardown(flow_table, flow);
 		return;
 	}
 
 	if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
 	    (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
 	     flow->tuplehash[1].tuple.iifidx == dev->ifindex))
-		flow_offload_teardown(flow);
+		flow_offload_teardown(flow_table, flow);
 }
 
 void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 19efba1e51ef..9c97b9994a96 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -18,7 +18,8 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 
-static int nf_flow_state_check(struct flow_offload *flow, int proto,
+static int nf_flow_state_check(struct nf_flowtable *flow_table,
+			       struct flow_offload *flow, int proto,
 			       struct sk_buff *skb, unsigned int thoff)
 {
 	struct tcphdr *tcph;
@@ -28,7 +29,7 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
 
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	if (unlikely(tcph->fin || tcph->rst)) {
-		flow_offload_teardown(flow);
+		flow_offload_teardown(flow_table, flow);
 		return -1;
 	}
 
@@ -373,11 +374,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	iph = (struct iphdr *)(skb_network_header(skb) + offset);
 	thoff = (iph->ihl * 4) + offset;
-	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
+	if (nf_flow_state_check(flow_table, flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
 	if (!nf_flow_dst_check(&tuplehash->tuple)) {
-		flow_offload_teardown(flow);
+		flow_offload_teardown(flow_table, flow);
 		return NF_ACCEPT;
 	}
 
@@ -419,7 +420,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	case FLOW_OFFLOAD_XMIT_DIRECT:
 		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
 		if (ret == NF_DROP)
-			flow_offload_teardown(flow);
+			flow_offload_teardown(flow_table, flow);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -639,11 +640,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 
 	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 	thoff = sizeof(*ip6h) + offset;
-	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
+	if (nf_flow_state_check(flow_table, flow, ip6h->nexthdr, skb, thoff))
 		return NF_ACCEPT;
 
 	if (!nf_flow_dst_check(&tuplehash->tuple)) {
-		flow_offload_teardown(flow);
+		flow_offload_teardown(flow_table, flow);
 		return NF_ACCEPT;
 	}
 
@@ -684,7 +685,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	case FLOW_OFFLOAD_XMIT_DIRECT:
 		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
 		if (ret == NF_DROP)
-			flow_offload_teardown(flow);
+			flow_offload_teardown(flow_table, flow);
 		break;
 	default:
 		WARN_ON_ONCE(1);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 0ca2bb8ed026..861305c9c079 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -274,8 +274,41 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
 	return err;
 }
 
+static bool tcf_ct_flow_table_get_timeout(struct nf_flowtable *ft,
+					  struct flow_offload *flow,
+					  s32 *val)
+{
+	struct nf_conn *ct = flow->ct;
+	int l4num =
+		nf_ct_protonum(ct);
+	struct net *net =
+		nf_ct_net(ct);
+
+	if (l4num == IPPROTO_TCP) {
+		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+		ct->proto.tcp.seen[0].td_maxwin = 0;
+		ct->proto.tcp.seen[1].td_maxwin = 0;
+		*val = tn->timeouts[ct->proto.tcp.state];
+		*val -= tn->offload_timeout;
+	} else if (l4num == IPPROTO_UDP) {
+		struct nf_udp_net *tn = nf_udp_pernet(net);
+		enum udp_conntrack state =
+			test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+			UDP_CT_REPLIED : UDP_CT_UNREPLIED;
+
+		*val = tn->timeouts[state];
+		*val -= tn->offload_timeout;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
 static struct nf_flowtable_type flowtable_ct = {
 	.action		= tcf_ct_flow_table_fill_actions,
+	.timeout	= tcf_ct_flow_table_get_timeout,
 	.owner		= THIS_MODULE,
 };
 
@@ -622,7 +655,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
 	ct = flow->ct;
 
 	if (tcph && (unlikely(tcph->fin || tcph->rst))) {
-		flow_offload_teardown(flow);
+		flow_offload_teardown(nf_ft, flow);
 		return false;
 	}
 
-- 
2.38.1


  parent reply	other threads:[~2023-01-27 18:39 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-27 18:38 [PATCH net-next v5 0/7] Allow offloading of UDP NEW connections via act_ct Vlad Buslov
2023-01-27 18:38 ` [PATCH net-next v5 1/7] net: flow_offload: provision conntrack info in ct_metadata Vlad Buslov
2023-01-27 18:38 ` Vlad Buslov [this message]
2023-01-28 15:27   ` [PATCH net-next v5 2/7] netfilter: flowtable: fixup UDP timeout depending on ct state Pablo Neira Ayuso
2023-01-28 16:03     ` Vlad Buslov
2023-01-28 19:09       ` Pablo Neira Ayuso
2023-01-28 19:30         ` Vlad Buslov
2023-01-27 18:38 ` [PATCH net-next v5 3/7] netfilter: flowtable: allow unidirectional rules Vlad Buslov
2023-01-27 18:38 ` [PATCH net-next v5 4/7] netfilter: flowtable: save ctinfo in flow_offload Vlad Buslov
2023-01-27 18:38 ` [PATCH net-next v5 5/7] net/sched: act_ct: set ctinfo in meta action depending on ct state Vlad Buslov
2023-01-27 18:38 ` [PATCH net-next v5 6/7] net/sched: act_ct: offload UDP NEW connections Vlad Buslov
2023-01-28 15:26   ` Pablo Neira Ayuso
2023-01-28 15:31     ` Vlad Buslov
2023-01-28 19:09       ` Pablo Neira Ayuso
2023-01-28 19:28         ` Vlad Buslov
2023-01-27 18:38 ` [PATCH net-next v5 7/7] netfilter: nf_conntrack: allow early drop of offloaded UDP conns Vlad Buslov
2023-01-28 15:51 ` [PATCH net-next v5 0/7] Allow offloading of UDP NEW connections via act_ct Pablo Neira Ayuso
2023-01-28 16:04   ` Vlad Buslov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230127183845.597861-3-vladbu@nvidia.com \
    --to=vladbu@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=kuba@kernel.org \
    --cc=marcelo.leitner@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=ozsh@nvidia.com \
    --cc=pabeni@redhat.com \
    --cc=pablo@netfilter.org \
    --cc=simon.horman@corigine.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.