Stable Archive on lore.kernel.org
 help / color / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Yuchung Cheng <ycheng@google.com>,
	Neal Cardwell <ncardwell@google.com>,
	Eric Dumazet <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 5.7 12/20] tcp: allow at most one TLP probe per flight
Date: Thu, 30 Jul 2020 10:04:02 +0200
Message-ID: <20200730074421.120670127@linuxfoundation.org> (raw)
In-Reply-To: <20200730074420.533211699@linuxfoundation.org>

From: Yuchung Cheng <ycheng@google.com>

[ Upstream commit 76be93fc0702322179bb0ea87295d820ee46ad14 ]

Previously TLP may send multiple probes of new data in one
flight. This happens when the sender is cwnd limited. After the
initial TLP containing new data is sent, the sender receives another
ACK that acks partial inflight.  It may re-arm another TLP timer
to send more, if no further ACK returns before the next TLP timeout
(PTO) expires. The sender may send in theory a large amount of TLP
until send queue is depleted. This only happens if the sender sees
such irregular uncommon ACK pattern. But it is generally undesirable
behavior during congestion especially.

The original TLP design restrict only one TLP probe per inflight as
published in "Reducing Web Latency: the Virtue of Gentle Aggression",
SIGCOMM 2013. This patch changes TLP to send at most one probe
per inflight.

Note that if the sender is app-limited, TLP retransmits old data
and did not have this issue.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tcp.h   |    4 +++-
 net/ipv4/tcp_input.c  |   11 ++++++-----
 net/ipv4/tcp_output.c |   13 ++++++++-----
 3 files changed, 17 insertions(+), 11 deletions(-)

--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -217,6 +217,8 @@ struct tcp_sock {
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
 	u8	compressed_ack;
+	u8	tlp_retrans:1,	/* TLP is a retransmission */
+		unused:7;
 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */
 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
 	u8	chrono_type:2,	/* current chronograph type */
@@ -239,7 +241,7 @@ struct tcp_sock {
 		save_syn:1,	/* Save headers of SYN packet */
 		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
 		syn_smc:1;	/* SYN includes SMC */
-	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
+	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */
 
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3506,10 +3506,8 @@ static void tcp_replace_ts_recent(struct
 	}
 }
 
-/* This routine deals with acks during a TLP episode.
- * We mark the end of a TLP episode on receiving TLP dupack or when
- * ack is after tlp_high_seq.
- * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+/* This routine deals with acks during a TLP episode and ends an episode by
+ * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
  */
 static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 {
@@ -3518,7 +3516,10 @@ static void tcp_process_tlp_ack(struct s
 	if (before(ack, tp->tlp_high_seq))
 		return;
 
-	if (flag & FLAG_DSACKING_ACK) {
+	if (!tp->tlp_retrans) {
+		/* TLP of new data has been acknowledged */
+		tp->tlp_high_seq = 0;
+	} else if (flag & FLAG_DSACKING_ACK) {
 		/* This DSACK means original and TLP probe arrived; no loss */
 		tp->tlp_high_seq = 0;
 	} else if (after(ack, tp->tlp_high_seq)) {
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2625,6 +2625,11 @@ void tcp_send_loss_probe(struct sock *sk
 	int pcount;
 	int mss = tcp_current_mss(sk);
 
+	/* At most one outstanding TLP */
+	if (tp->tlp_high_seq)
+		goto rearm_timer;
+
+	tp->tlp_retrans = 0;
 	skb = tcp_send_head(sk);
 	if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
 		pcount = tp->packets_out;
@@ -2642,10 +2647,6 @@ void tcp_send_loss_probe(struct sock *sk
 		return;
 	}
 
-	/* At most one outstanding TLP retransmission. */
-	if (tp->tlp_high_seq)
-		goto rearm_timer;
-
 	if (skb_still_in_host_queue(sk, skb))
 		goto rearm_timer;
 
@@ -2667,10 +2668,12 @@ void tcp_send_loss_probe(struct sock *sk
 	if (__tcp_retransmit_skb(sk, skb, 1))
 		goto rearm_timer;
 
+	tp->tlp_retrans = 1;
+
+probe_sent:
 	/* Record snd_nxt for loss detection. */
 	tp->tlp_high_seq = tp->snd_nxt;
 
-probe_sent:
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
 	/* Reset s.t. tcp_rearm_rto will restart timer from now */
 	inet_csk(sk)->icsk_pending = 0;



  parent reply index

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-30  8:03 [PATCH 5.7 00/20] 5.7.12-rc1 review Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 01/20] AX.25: Fix out-of-bounds read in ax25_connect() Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 02/20] AX.25: Prevent out-of-bounds read in ax25_sendmsg() Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 03/20] dev: Defer free of skbs in flush_backlog Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 04/20] drivers/net/wan/x25_asy: Fix to make it work Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 05/20] ip6_gre: fix null-ptr-deref in ip6gre_init_net() Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 06/20] net/sched: act_ct: fix restore the qdisc_skb_cb after defrag Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 07/20] net-sysfs: add a newline when printing tx_timeout by sysfs Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 08/20] net: udp: Fix wrong clean up for IS_UDPLITE macro Greg Kroah-Hartman
2020-07-30  8:03 ` [PATCH 5.7 09/20] qrtr: orphan socket in qrtr_release() Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 10/20] rtnetlink: Fix memory(net_device) leak when ->newlink fails Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 11/20] rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA Greg Kroah-Hartman
2020-07-30  8:04 ` Greg Kroah-Hartman [this message]
2020-07-30  8:04 ` [PATCH 5.7 13/20] AX.25: Prevent integer overflows in connect and sendmsg Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 14/20] sctp: shrink stream outq only when new outcnt < old outcnt Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 15/20] sctp: shrink stream outq when fails to do addstream reconf Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 16/20] udp: Copy has_conns in reuseport_grow() Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 17/20] udp: Improve load balancing for SO_REUSEPORT Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 18/20] tipc: allow to build NACK message in link timeout function Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 19/20] io_uring: ensure double poll additions work with both request types Greg Kroah-Hartman
2020-07-30  8:04 ` [PATCH 5.7 20/20] regmap: debugfs: check count when read regmap file Greg Kroah-Hartman
2020-07-30 16:48 ` [PATCH 5.7 00/20] 5.7.12-rc1 review Guenter Roeck
2020-07-31 17:15   ` Greg Kroah-Hartman
2020-07-31  8:59 ` Naresh Kamboju
2020-07-31 12:53 ` Jon Hunter
2020-07-31 17:15   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200730074421.120670127@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ncardwell@google.com \
    --cc=stable@vger.kernel.org \
    --cc=ycheng@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Stable Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ https://lore.kernel.org/stable \
		stable@vger.kernel.org
	public-inbox-index stable

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.stable


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git