All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments
@ 2020-07-16 19:12 Priyaranjan Jha
  2020-07-16 19:12 ` [PATCH net-next 1/2] tcp: fix segment accounting when DSACK range covers " Priyaranjan Jha
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Priyaranjan Jha @ 2020-07-16 19:12 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Priyaranjan Jha

Currently, while processing DSACK, we assume DSACK covers only one
segment. This leads to significant underestimation of no. of duplicate
segments with LRO/GRO. Also, the existing SNMP counters, TCPDSACKRecv
and TCPDSACKOfoRecv, make similar assumption for DSACK, which makes them
unusable for estimating spurious retransmit rates.

This patch series fixes the segment accounting with DSACK, by estimating
number of duplicate segments based on: (DSACKed sequence range) / MSS.
It also introduces a new SNMP counter, TCPDSACKRecvSegs, which tracks
the estimated number of duplicate segments.

Priyaranjan Jha (2):
  tcp: fix segment accounting when DSACK range covers multiple segments
  tcp: add SNMP counter for no. of duplicate segments reported by DSACK

 include/uapi/linux/snmp.h |  1 +
 net/ipv4/proc.c           |  1 +
 net/ipv4/tcp_input.c      | 81 ++++++++++++++++++++++-----------------
 3 files changed, 47 insertions(+), 36 deletions(-)

-- 
2.27.0.389.gc38d7665816-goog


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH net-next 1/2] tcp: fix segment accounting when DSACK range covers multiple segments
  2020-07-16 19:12 [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments Priyaranjan Jha
@ 2020-07-16 19:12 ` Priyaranjan Jha
  2020-07-16 19:12 ` [PATCH net-next 2/2] tcp: add SNMP counter for no. of duplicate segments reported by DSACK Priyaranjan Jha
  2020-07-17 19:54 ` [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Priyaranjan Jha @ 2020-07-16 19:12 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Priyaranjan Jha, Neal Cardwell, Yuchung Cheng,
	Soheil Hassas Yeganeh, Eric Dumazet, Yousuk Seung

Currently, while processing DSACK, we assume DSACK covers only one
segment. This leads to significant underestimation of DSACKs with
LRO/GRO. This patch fixes segment accounting with DSACK by estimating
segment count from DSACK sequence range / MSS.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yousuk Seung <ysseung@google.com>
---
 net/ipv4/tcp_input.c | 80 ++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b03ca68d4111..5d6bbcb1e570 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -871,12 +871,41 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
+struct tcp_sacktag_state {
+	/* Timestamps for earliest and latest never-retransmitted segment
+	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
+	 * but congestion control should still get an accurate delay signal.
+	 */
+	u64	first_sackt;
+	u64	last_sackt;
+	u32	reord;
+	u32	sack_delivered;
+	int	flag;
+	unsigned int mss_now;
+	struct rate_sample *rate;
+};
+
 /* Take a notice that peer is sending D-SACKs */
-static void tcp_dsack_seen(struct tcp_sock *tp)
+static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
+			  u32 end_seq, struct tcp_sacktag_state *state)
 {
+	u32 seq_len, dup_segs = 1;
+
+	if (before(start_seq, end_seq)) {
+		seq_len = end_seq - start_seq;
+		if (seq_len > tp->mss_cache)
+			dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+	}
+
 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
 	tp->rack.dsack_seen = 1;
-	tp->dsack_dups++;
+	tp->dsack_dups += dup_segs;
+
+	state->flag |= FLAG_DSACKING_ACK;
+	/* A spurious retransmission is delivered */
+	state->sack_delivered += dup_segs;
+
+	return dup_segs;
 }
 
 /* It's reordering when higher sequence was delivered (i.e. sacked) before
@@ -1103,53 +1132,37 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
 
 static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 			    struct tcp_sack_block_wire *sp, int num_sacks,
-			    u32 prior_snd_una)
+			    u32 prior_snd_una, struct tcp_sacktag_state *state)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
 	u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
-	bool dup_sack = false;
+	u32 dup_segs;
 
 	if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
-		dup_sack = true;
-		tcp_dsack_seen(tp);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
 	} else if (num_sacks > 1) {
 		u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
 		u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
 
-		if (!after(end_seq_0, end_seq_1) &&
-		    !before(start_seq_0, start_seq_1)) {
-			dup_sack = true;
-			tcp_dsack_seen(tp);
-			NET_INC_STATS(sock_net(sk),
-					LINUX_MIB_TCPDSACKOFORECV);
-		}
+		if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
+			return false;
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
+	} else {
+		return false;
 	}
 
+	dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
+
 	/* D-SACK for already forgotten data... Do dumb counting. */
-	if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
+	if (tp->undo_marker && tp->undo_retrans > 0 &&
 	    !after(end_seq_0, prior_snd_una) &&
 	    after(end_seq_0, tp->undo_marker))
-		tp->undo_retrans--;
+		tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
 
-	return dup_sack;
+	return true;
 }
 
-struct tcp_sacktag_state {
-	u32	reord;
-	/* Timestamps for earliest and latest never-retransmitted segment
-	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
-	 * but congestion control should still get an accurate delay signal.
-	 */
-	u64	first_sackt;
-	u64	last_sackt;
-	struct rate_sample *rate;
-	int	flag;
-	unsigned int mss_now;
-	u32	sack_delivered;
-};
-
 /* Check if skb is fully within the SACK block. In presence of GSO skbs,
  * the incoming SACK may not exactly match but we can find smaller MSS
  * aligned portion of it that matches. Therefore we might need to fragment
@@ -1692,12 +1705,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 		tcp_highest_sack_reset(sk);
 
 	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
-					 num_sacks, prior_snd_una);
-	if (found_dup_sack) {
-		state->flag |= FLAG_DSACKING_ACK;
-		/* A spurious retransmission is delivered */
-		state->sack_delivered++;
-	}
+					 num_sacks, prior_snd_una, state);
 
 	/* Eliminate too old ACKs, but take into
 	 * account more or less fresh ones, they can
-- 
2.27.0.389.gc38d7665816-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH net-next 2/2] tcp: add SNMP counter for no. of duplicate segments reported by DSACK
  2020-07-16 19:12 [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments Priyaranjan Jha
  2020-07-16 19:12 ` [PATCH net-next 1/2] tcp: fix segment accounting when DSACK range covers " Priyaranjan Jha
@ 2020-07-16 19:12 ` Priyaranjan Jha
  2020-07-17 19:54 ` [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Priyaranjan Jha @ 2020-07-16 19:12 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Priyaranjan Jha, Neal Cardwell, Yuchung Cheng,
	Soheil Hassas Yeganeh

There are two existing SNMP counters, TCPDSACKRecv and TCPDSACKOfoRecv,
which are incremented depending on whether the DSACKed range is below
the cumulative ACK sequence number or not. Unfortunately, these both
implicitly assume each DSACK covers only one segment. This makes these
counters unusable for estimating spurious retransmit rates,
or real/non-spurious loss rate.

This patch introduces a new SNMP counter, TCPDSACKRecvSegs, which tracks
the estimated number of duplicate segments based on:
(DSACKed sequence range) / MSS. This counter is usable for estimating
spurious retransmit rates, or real/non-spurious loss rate.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
---
 include/uapi/linux/snmp.h | 1 +
 net/ipv4/proc.c           | 1 +
 net/ipv4/tcp_input.c      | 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 7d91f4debc48..cee9f8e6fce3 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -287,6 +287,7 @@ enum
 	LINUX_MIB_TCPFASTOPENPASSIVEALTKEY,	/* TCPFastOpenPassiveAltKey */
 	LINUX_MIB_TCPTIMEOUTREHASH,		/* TCPTimeoutRehash */
 	LINUX_MIB_TCPDUPLICATEDATAREHASH,	/* TCPDuplicateDataRehash */
+	LINUX_MIB_TCPDSACKRECVSEGS,		/* TCPDSACKRecvSegs */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 75545a829a2b..1074df726ec0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -292,6 +292,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
 	SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH),
 	SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
+	SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5d6bbcb1e570..82906deb7874 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1153,6 +1153,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 	}
 
 	dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
+	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
 
 	/* D-SACK for already forgotten data... Do dumb counting. */
 	if (tp->undo_marker && tp->undo_retrans > 0 &&
-- 
2.27.0.389.gc38d7665816-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments
  2020-07-16 19:12 [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments Priyaranjan Jha
  2020-07-16 19:12 ` [PATCH net-next 1/2] tcp: fix segment accounting when DSACK range covers " Priyaranjan Jha
  2020-07-16 19:12 ` [PATCH net-next 2/2] tcp: add SNMP counter for no. of duplicate segments reported by DSACK Priyaranjan Jha
@ 2020-07-17 19:54 ` David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2020-07-17 19:54 UTC (permalink / raw)
  To: priyarjha; +Cc: netdev

From: Priyaranjan Jha <priyarjha@google.com>
Date: Thu, 16 Jul 2020 12:12:33 -0700

> Currently, while processing DSACK, we assume DSACK covers only one
> segment. This leads to significant underestimation of no. of duplicate
> segments with LRO/GRO. Also, the existing SNMP counters, TCPDSACKRecv
> and TCPDSACKOfoRecv, make similar assumption for DSACK, which makes them
> unusable for estimating spurious retransmit rates.
> 
> This patch series fixes the segment accounting with DSACK, by estimating
> number of duplicate segments based on: (DSACKed sequence range) / MSS.
> It also introduces a new SNMP counter, TCPDSACKRecvSegs, which tracks
> the estimated number of duplicate segments.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-07-17 19:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-16 19:12 [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments Priyaranjan Jha
2020-07-16 19:12 ` [PATCH net-next 1/2] tcp: fix segment accounting when DSACK range covers " Priyaranjan Jha
2020-07-16 19:12 ` [PATCH net-next 2/2] tcp: add SNMP counter for no. of duplicate segments reported by DSACK Priyaranjan Jha
2020-07-17 19:54 ` [PATCH net-next 0/2] tcp: improve handling of DSACK covering multiple segments David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.