All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf-next 0/3] netfilter: conntrack: ignore overly delayed tcp packets
@ 2022-07-20 17:52 Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 1/3] netfilter: conntrack: prepare tcp_in_window for ternary return value Florian Westphal
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Florian Westphal @ 2022-07-20 17:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kadlec, Florian Westphal

Consider following ruleset:
... ct state new accept
... ct state invalid drop

Normally a tcp receiver will reply with an ack once it receives
a delayed packet. Example:

+0.0001 < P. 1:1461(1460) ack 1 win 257
+.0 > . 1:1(0) ack 1461 win 65535
+0.0001 < P. 1461:2921(1460) ack 1 win 257
[..]
+0.0001 < P. 65701:67161(1460) ack 1 win 257
+.0 > . 1:1(0) ack 67161 win 65535 // all data received

// delayed packet, already acked
+0.0001 < P. 1:1461(1460) ack 1 win 257

// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=.. SEQ=1 ACK=4162510439 WINDOW=257 ACK PSHR
+.0 > . 1:1(0) ack 67161 win 65535

If the delayed packet is not dropped, the receiver can
immediately send another ack, but this doesn't happen if
INVALID packets are dropped by the ruleset (which is a common thing to do).

This changes conntrack to treat such packets as valid, with the
caveat that they will not extend the tcp timeout or cause state
changes.

Ideally we could augment state matching so that this decision
is pushe to the ruleset but so far I don't see how this could be done
with the limited space we have in sk_buff (except for yet another skb
extension, but that appears to be too much for such a narrow use case).

Florian Westphal (3):
  netfilter: conntrack: prepare tcp_in_window for ternary return value
  netfilter: conntrack: ignore overly delayed tcp packets
  netfilter: conntrack: remove unneeded indent level

 net/netfilter/nf_conntrack_proto_tcp.c | 208 ++++++++++++++-----------
 1 file changed, 116 insertions(+), 92 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH nf-next 1/3] netfilter: conntrack: prepare tcp_in_window for ternary return value
  2022-07-20 17:52 [PATCH nf-next 0/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
@ 2022-07-20 17:52 ` Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 2/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 3/3] netfilter: conntrack: remove unneeded indent level Florian Westphal
  2 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2022-07-20 17:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kadlec, Florian Westphal

tcp_in_window returns true if the packet is in window and false if it is
not.

If its outside of window, packet will be treated as INVALID.

This can happen during normal operation, e.g. when a packet is
reordered/delayed and a retransmit repaired the window in mean time.

Rulesets may drop or log such packets, which creates useless noise.
There is currently no way to distinguish different 'reasons' for refusal
to track the packet.

Prepare tcp_in_window() to return an enum that tells the desired
action (in-window/accept, bogus/drop).

A third action (accept the packet as in-window, but do not change
state) is added in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 74 +++++++++++++++++++-------
 1 file changed, 56 insertions(+), 18 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a63b51dceaf2..01dc34ead0a4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -47,6 +47,11 @@ static const char *const tcp_conntrack_names[] = {
 	"SYN_SENT2",
 };
 
+enum nf_ct_tcp_action {
+	NFCT_TCP_INVALID,
+	NFCT_TCP_ACCEPT,
+};
+
 #define SECS * HZ
 #define MINS * 60 SECS
 #define HOURS * 60 MINS
@@ -472,13 +477,37 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
 	}
 }
 
-static bool tcp_in_window(struct nf_conn *ct,
-			  enum ip_conntrack_dir dir,
-			  unsigned int index,
-			  const struct sk_buff *skb,
-			  unsigned int dataoff,
-			  const struct tcphdr *tcph,
-			  const struct nf_hook_state *hook_state)
+__printf(6, 7)
+static enum nf_ct_tcp_action nf_tcp_log_invalid(const struct sk_buff *skb,
+						const struct nf_conn *ct,
+						const struct nf_hook_state *state,
+						const struct ip_ct_tcp_state *sender,
+						enum nf_ct_tcp_action ret,
+						const char *fmt, ...)
+{
+	const struct nf_tcp_net *tn = nf_tcp_pernet(nf_ct_net(ct));
+	struct va_format vaf;
+	va_list args;
+	bool be_liberal;
+
+	be_liberal = sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal;
+	if (be_liberal)
+		return NFCT_TCP_ACCEPT;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	nf_ct_l4proto_log_invalid(skb, ct, state, "%pV", &vaf);
+	va_end(args);
+
+	return ret;
+}
+
+static enum nf_ct_tcp_action
+tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
+	      unsigned int index, const struct sk_buff *skb,
+	      unsigned int dataoff, const struct tcphdr *tcph,
+	      const struct nf_hook_state *hook_state)
 {
 	struct ip_ct_tcp *state = &ct->proto.tcp;
 	struct net *net = nf_ct_net(ct);
@@ -486,9 +515,9 @@ static bool tcp_in_window(struct nf_conn *ct,
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	__u32 seq, ack, sack, end, win, swin;
-	u16 win_raw;
+	bool in_recv_win, seq_ok;
 	s32 receiver_offset;
-	bool res, in_recv_win;
+	u16 win_raw;
 
 	/*
 	 * Get the required data from the packet.
@@ -517,7 +546,7 @@ static bool tcp_in_window(struct nf_conn *ct,
 					end, win);
 			if (!tcph->ack)
 				/* Simultaneous open */
-				return true;
+				return NFCT_TCP_ACCEPT;
 		} else {
 			/*
 			 * We are in the middle of a connection,
@@ -584,13 +613,22 @@ static bool tcp_in_window(struct nf_conn *ct,
 		 */
 		seq = end = sender->td_end;
 
+	seq_ok = before(seq, sender->td_maxend + 1);
+	if (!seq_ok)
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID,
+					  "SEQ is over upper bound %u (over the window of the receiver)",
+					  sender->td_maxend + 1);
+
+	if (!before(sack, receiver->td_end + 1))
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID,
+					  "ACK is over upper bound %u (ACKed data not seen yet)",
+					  receiver->td_end + 1);
+
 	/* Is the ending sequence in the receive window (if available)? */
 	in_recv_win = !receiver->td_maxwin ||
 		      after(end, sender->td_end - receiver->td_maxwin - 1);
 
-	if (before(seq, sender->td_maxend + 1) &&
-	    in_recv_win &&
-	    before(sack, receiver->td_end + 1) &&
+	if (in_recv_win &&
 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 		/*
 		 * Take into account window scaling (RFC 1323).
@@ -648,13 +686,11 @@ static bool tcp_in_window(struct nf_conn *ct,
 				state->retrans = 0;
 			}
 		}
-		res = true;
 	} else {
-		res = false;
 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 		    tn->tcp_be_liberal)
-			res = true;
-		if (!res) {
+			return NFCT_TCP_ACCEPT;
+		{
 			nf_ct_l4proto_log_invalid(skb, ct, hook_state,
 			"%s",
 			before(seq, sender->td_maxend + 1) ?
@@ -666,9 +702,11 @@ static bool tcp_in_window(struct nf_conn *ct,
 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
 		}
+
+		return NFCT_TCP_INVALID;
 	}
 
-	return res;
+	return NFCT_TCP_ACCEPT;
 }
 
 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH nf-next 2/3] netfilter: conntrack: ignore overly delayed tcp packets
  2022-07-20 17:52 [PATCH nf-next 0/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 1/3] netfilter: conntrack: prepare tcp_in_window for ternary return value Florian Westphal
@ 2022-07-20 17:52 ` Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 3/3] netfilter: conntrack: remove unneeded indent level Florian Westphal
  2 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2022-07-20 17:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kadlec, Florian Westphal

If 'nf_conntrack_tcp_loose' is off (the default), tcp packets that are
outside of the current window are marked as INVALID.

nf/iptables rulesets often drop such packets via 'ct state invalid' or
similar checks.  In addition, they may log suck an event.

Since they are not invalid in a strict sense, just ignore them.

Conntrack won't extend timeout or change state for such packets but
they will still be treated as being part of the connection.

So, 'state invalid' rules won't match them anymore.
This allows the receiver to immediately respond with an ack.
This doesn't happen as-is if invalid packets get discarded.

The else branch of the conditional becomes obsolete.
Next patch will reformant the now always-true if condition.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 45 ++++++++++++--------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 01dc34ead0a4..c72588382421 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -48,6 +48,7 @@ static const char *const tcp_conntrack_names[] = {
 };
 
 enum nf_ct_tcp_action {
+	NFCT_TCP_IGNORE,
 	NFCT_TCP_INVALID,
 	NFCT_TCP_ACCEPT,
 };
@@ -510,8 +511,6 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	      const struct nf_hook_state *hook_state)
 {
 	struct ip_ct_tcp *state = &ct->proto.tcp;
-	struct net *net = nf_ct_net(ct);
-	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	__u32 seq, ack, sack, end, win, swin;
@@ -627,9 +626,17 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	/* Is the ending sequence in the receive window (if available)? */
 	in_recv_win = !receiver->td_maxwin ||
 		      after(end, sender->td_end - receiver->td_maxwin - 1);
+	if (!in_recv_win)
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+					  "SEQ is under lower bound %u (already ACKed data retransmitted)",
+					  sender->td_end - receiver->td_maxwin - 1);
+	if (!after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1))
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+					  "ignored ACK under lower bound %u (possible overly delayed)",
+					  receiver->td_end - MAXACKWINDOW(sender) - 1);
+
 
-	if (in_recv_win &&
-	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
+	if (1) {
 		/*
 		 * Take into account window scaling (RFC 1323).
 		 */
@@ -686,24 +693,6 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
 				state->retrans = 0;
 			}
 		}
-	} else {
-		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
-		    tn->tcp_be_liberal)
-			return NFCT_TCP_ACCEPT;
-		{
-			nf_ct_l4proto_log_invalid(skb, ct, hook_state,
-			"%s",
-			before(seq, sender->td_maxend + 1) ?
-			in_recv_win ?
-			before(sack, receiver->td_end + 1) ?
-			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
-			: "ACK is under the lower bound (possible overly delayed ACK)"
-			: "ACK is over the upper bound (ACKed data not seen yet)"
-			: "SEQ is under the lower bound (already ACKed data retransmitted)"
-			: "SEQ is over the upper bound (over the window of the receiver)");
-		}
-
-		return NFCT_TCP_INVALID;
 	}
 
 	return NFCT_TCP_ACCEPT;
@@ -868,6 +857,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
 	unsigned int index, *timeouts;
+	enum nf_ct_tcp_action res;
 	enum ip_conntrack_dir dir;
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -1133,10 +1123,17 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 		break;
 	}
 
-	if (!tcp_in_window(ct, dir, index,
-			   skb, dataoff, th, state)) {
+	res = tcp_in_window(ct, dir, index,
+			    skb, dataoff, th, state);
+	switch (res) {
+	case NFCT_TCP_IGNORE:
+		spin_unlock_bh(&ct->lock);
+		return NF_ACCEPT;
+	case NFCT_TCP_INVALID:
 		spin_unlock_bh(&ct->lock);
 		return -NF_ACCEPT;
+	case NFCT_TCP_ACCEPT:
+		break;
 	}
      in_window:
 	/* From now on we have got in-window packets */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH nf-next 3/3] netfilter: conntrack: remove unneeded indent level
  2022-07-20 17:52 [PATCH nf-next 0/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 1/3] netfilter: conntrack: prepare tcp_in_window for ternary return value Florian Westphal
  2022-07-20 17:52 ` [PATCH nf-next 2/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
@ 2022-07-20 17:52 ` Florian Westphal
  2 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2022-07-20 17:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kadlec, Florian Westphal

After previous patch, the conditional branch is obsolete, reformat it.
gcc generates same code as before this change.

Also includes cleanups to pacify checkpatch.pl.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 100 +++++++++++--------------
 1 file changed, 45 insertions(+), 55 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c72588382421..e1e354ea62ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -635,63 +635,53 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
 					  "ignored ACK under lower bound %u (possible overly delayed)",
 					  receiver->td_end - MAXACKWINDOW(sender) - 1);
 
-
-	if (1) {
-		/*
-		 * Take into account window scaling (RFC 1323).
-		 */
-		if (!tcph->syn)
-			win <<= sender->td_scale;
-
-		/*
-		 * Update sender data.
-		 */
-		swin = win + (sack - ack);
-		if (sender->td_maxwin < swin)
-			sender->td_maxwin = swin;
-		if (after(end, sender->td_end)) {
-			sender->td_end = end;
-			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
-		}
-		if (tcph->ack) {
-			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
-				sender->td_maxack = ack;
-				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
-			} else if (after(ack, sender->td_maxack))
-				sender->td_maxack = ack;
-		}
-
-		/*
-		 * Update receiver data.
-		 */
-		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
-			receiver->td_maxwin += end - sender->td_maxend;
-		if (after(sack + win, receiver->td_maxend - 1)) {
-			receiver->td_maxend = sack + win;
-			if (win == 0)
-				receiver->td_maxend++;
+	/* Take into account window scaling (RFC 1323). */
+	if (!tcph->syn)
+		win <<= sender->td_scale;
+
+	/* Update sender data. */
+	swin = win + (sack - ack);
+	if (sender->td_maxwin < swin)
+		sender->td_maxwin = swin;
+	if (after(end, sender->td_end)) {
+		sender->td_end = end;
+		sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+	}
+	if (tcph->ack) {
+		if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
+			sender->td_maxack = ack;
+			sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
+		} else if (after(ack, sender->td_maxack)) {
+			sender->td_maxack = ack;
 		}
-		if (ack == receiver->td_end)
-			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+	}
 
-		/*
-		 * Check retransmissions.
-		 */
-		if (index == TCP_ACK_SET) {
-			if (state->last_dir == dir
-			    && state->last_seq == seq
-			    && state->last_ack == ack
-			    && state->last_end == end
-			    && state->last_win == win_raw)
-				state->retrans++;
-			else {
-				state->last_dir = dir;
-				state->last_seq = seq;
-				state->last_ack = ack;
-				state->last_end = end;
-				state->last_win = win_raw;
-				state->retrans = 0;
-			}
+	/* Update receiver data. */
+	if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
+		receiver->td_maxwin += end - sender->td_maxend;
+	if (after(sack + win, receiver->td_maxend - 1)) {
+		receiver->td_maxend = sack + win;
+		if (win == 0)
+			receiver->td_maxend++;
+	}
+	if (ack == receiver->td_end)
+		receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+	/* Check retransmissions. */
+	if (index == TCP_ACK_SET) {
+		if (state->last_dir == dir &&
+		    state->last_seq == seq &&
+		    state->last_ack == ack &&
+		    state->last_end == end &&
+		    state->last_win == win_raw) {
+			state->retrans++;
+		} else {
+			state->last_dir = dir;
+			state->last_seq = seq;
+			state->last_ack = ack;
+			state->last_end = end;
+			state->last_win = win_raw;
+			state->retrans = 0;
 		}
 	}
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-07-20 17:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-20 17:52 [PATCH nf-next 0/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
2022-07-20 17:52 ` [PATCH nf-next 1/3] netfilter: conntrack: prepare tcp_in_window for ternary return value Florian Westphal
2022-07-20 17:52 ` [PATCH nf-next 2/3] netfilter: conntrack: ignore overly delayed tcp packets Florian Westphal
2022-07-20 17:52 ` [PATCH nf-next 3/3] netfilter: conntrack: remove unneeded indent level Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.