netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling
@ 2013-07-22 23:20 Yuchung Cheng
  2013-07-22 23:20 ` [PATCH v2 net-next 2/4] tcp: prefer packet timing to TS-ECR for RTT Yuchung Cheng
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Yuchung Cheng @ 2013-07-22 23:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: netdev, Yuchung Cheng

This patch series improve RTT sampling in three ways:
1. Sample RTT during fast recovery and reordering events.
2. Favor ack-based RTT to timestamps because of broken TS ECR fields
3. Consolidate the RTT measurement logic.

The first patch consolidates SYNACK and other RTT measurement to use a
central function tcp_ack_update_rtt(). A (small) bonus is now SYNACK
RTT measurement happens after PAWS check, potentially reducing the
impact of RTO seeding on bad TCP timestamps values.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
ChangeLog in v2:
	fix not calling tcp_synack_rtt_meas on regular TCP

 include/net/tcp.h        |  9 ---------
 net/ipv4/tcp_input.c     | 14 ++++++++++++--
 net/ipv4/tcp_ipv4.c      |  2 --
 net/ipv4/tcp_minisocks.c |  8 ++------
 net/ipv6/tcp_ipv6.c      |  2 --
 5 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d198005..f9777db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1094,15 +1094,6 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->loc_port = tcp_hdr(skb)->dest;
 }
 
-/* Compute time elapsed between SYNACK and the ACK completing 3WHS */
-static inline void tcp_synack_rtt_meas(struct sock *sk,
-				       struct request_sock *req)
-{
-	if (tcp_rsk(req)->snt_synack)
-		tcp_valid_rtt_meas(sk,
-		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
-}
-
 extern void tcp_enter_memory_pressure(struct sock *sk);
 
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 28af45a..b531710 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2853,6 +2853,17 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
+/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
+static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	s32 seq_rtt = -1;
+
+	if (tp->lsndtime && !tp->total_retrans)
+		seq_rtt = tcp_time_stamp - tp->lsndtime;
+	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt);
+}
+
 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -5624,9 +5635,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		 * so release it.
 		 */
 		if (req) {
-			tcp_synack_rtt_meas(sk, req);
 			tp->total_retrans = req->num_retrans;
-
 			reqsk_fastopen_remove(sk, req, false);
 		} else {
 			/* Make sure socket is routed, for correct metrics. */
@@ -5651,6 +5660,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+		tcp_synack_rtt_meas(sk, req);
 
 		if (tp->rx_opt.tstamp_ok)
 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b299da5..2e3f129 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1671,8 +1671,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
 
 	tcp_initialize_rcv_mss(newsk);
-	tcp_synack_rtt_meas(newsk, req);
-	newtp->total_retrans = req->num_retrans;
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ab1c086..58a3e69 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -411,6 +411,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		tcp_enable_early_retrans(newtp);
 		newtp->tlp_high_seq = 0;
+		newtp->lsndtime = treq->snt_synack;
+		newtp->total_retrans = req->num_retrans;
 
 		/* So many TCP implementations out there (incorrectly) count the
 		 * initial SYN frame in their delayed-ACK and congestion control
@@ -666,12 +668,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (!(flg & TCP_FLAG_ACK))
 		return NULL;
 
-	/* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
-	if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
-		tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
-	else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
-		tcp_rsk(req)->snt_synack = 0;
-
 	/* For Fast Open no more processing is needed (sk is the
 	 * child socket).
 	 */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6e1649d..80fe69e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1237,8 +1237,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
 
 	tcp_initialize_rcv_mss(newsk);
-	tcp_synack_rtt_meas(newsk, req);
-	newtp->total_retrans = req->num_retrans;
 
 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
-- 
1.8.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 net-next 2/4] tcp: prefer packet timing to TS-ECR for RTT
  2013-07-22 23:20 [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling Yuchung Cheng
@ 2013-07-22 23:20 ` Yuchung Cheng
  2013-07-22 23:20 ` [PATCH v2 net-next 3/4] tcp: measure RTT from new SACK Yuchung Cheng
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Yuchung Cheng @ 2013-07-22 23:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: netdev, Yuchung Cheng

Prefer packet timings to TS-ecr for RTT measurements when both
sources are available. That's because broken middle-boxes and remote
peer can return packets with corrupted TS ECR fields. Similarly most
congestion controls that require RTT signals favor timing-based
sources as well. Also check for bad TS ECR values to avoid RTT
blow-ups. It has happened on production Web servers.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
---
 include/net/tcp.h    |  1 -
 net/ipv4/tcp_input.c | 67 ++++++++++++++--------------------------------------
 2 files changed, 18 insertions(+), 50 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f9777db..c586847 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -591,7 +591,6 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
 extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 extern int tcp_mss_to_mtu(struct sock *sk, int mss);
 extern void tcp_mtup_init(struct sock *sk);
-extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
 extern void tcp_init_buffer_space(struct sock *sk);
 
 static inline void tcp_bound_rto(const struct sock *sk)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b531710..c7398f0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2792,65 +2792,36 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 	tcp_xmit_retransmit_queue(sk);
 }
 
-void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
+static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
+				      s32 seq_rtt)
 {
-	tcp_rtt_estimator(sk, seq_rtt);
-	tcp_set_rto(sk);
-	inet_csk(sk)->icsk_backoff = 0;
-}
-EXPORT_SYMBOL(tcp_valid_rtt_meas);
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Prefer RTT measured from ACK's timing to TS-ECR. This is because
+	 * broken middle-boxes or peers may corrupt TS-ECR fields. But
+	 * Karn's algorithm forbids taking RTT if some retransmitted data
+	 * is acked (RFC6298).
+	 */
+	if (flag & FLAG_RETRANS_DATA_ACKED)
+		seq_rtt = -1;
 
-/* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Supersedes RFC1323)
- */
-static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
-{
 	/* RTTM Rule: A TSecr value received in a segment is used to
 	 * update the averaged RTT measurement only if the segment
 	 * acknowledges some new data, i.e., only if it advances the
 	 * left edge of the send window.
-	 *
 	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
-	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
-	 *
-	 * Changed: reset backoff as soon as we see the first valid sample.
-	 * If we do not, we get strongly overestimated rto. With timestamps
-	 * samples are accepted even from very old segments: f.e., when rtt=1
-	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
-	 * answer arrives rto becomes 120 seconds! If at least one of segments
-	 * in window is lost... Voila.	 			--ANK (010210)
 	 */
-	struct tcp_sock *tp = tcp_sk(sk);
+	if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+		seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
 
-	tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
-}
-
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
-{
-	/* We don't have a timestamp. Can only use
-	 * packets that are not retransmitted to determine
-	 * rtt estimates. Also, we must not reset the
-	 * backoff for rto until we get a non-retransmitted
-	 * packet. This allows us to deal with a situation
-	 * where the network delay has increased suddenly.
-	 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
-	 */
-
-	if (flag & FLAG_RETRANS_DATA_ACKED)
+	if (seq_rtt < 0)
 		return;
 
-	tcp_valid_rtt_meas(sk, seq_rtt);
-}
+	tcp_rtt_estimator(sk, seq_rtt);
+	tcp_set_rto(sk);
 
-static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      const s32 seq_rtt)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
-	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-		tcp_ack_saw_tstamp(sk, flag);
-	else if (seq_rtt >= 0)
-		tcp_ack_no_tstamp(sk, seq_rtt, flag);
+	/* RFC6298: only reset backoff on valid RTT measurement. */
+	inet_csk(sk)->icsk_backoff = 0;
 }
 
 /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
@@ -2989,8 +2960,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			if (sacked & TCPCB_SACKED_RETRANS)
 				tp->retrans_out -= acked_pcount;
 			flag |= FLAG_RETRANS_DATA_ACKED;
-			ca_seq_rtt = -1;
-			seq_rtt = -1;
 		} else {
 			ca_seq_rtt = now - scb->when;
 			last_ackt = skb->tstamp;
-- 
1.8.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 net-next 3/4] tcp: measure RTT from new SACK
  2013-07-22 23:20 [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling Yuchung Cheng
  2013-07-22 23:20 ` [PATCH v2 net-next 2/4] tcp: prefer packet timing to TS-ECR for RTT Yuchung Cheng
@ 2013-07-22 23:20 ` Yuchung Cheng
  2013-07-22 23:20 ` [PATCH v2 net-next 4/4] tcp: use RTT from SACK for RTO Yuchung Cheng
  2013-07-23  1:02 ` [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling David Miller
  3 siblings, 0 replies; 6+ messages in thread
From: Yuchung Cheng @ 2013-07-22 23:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: netdev, Yuchung Cheng

Take RTT sample if an ACK selectively acks some sequences that
have never been retransmitted. The Karn's algorithm does not apply
even if that ACK (s)acks other retransmitted sequences, because it
must been generated by an original but perhaps out-of-order packet.
There is no ambiguity. In case when multiple blocks are newly
sacked because of ACK losses the earliest block is used to
measure RTT, similar to cummulative ACKs.

Such RTT samples allow the sender to estimate the RTO during loss
recovery and packet reordering events. It is still useful even with
TCP timestamps. That's because during these events the SND.UNA may
not advance preventing RTT samples from TS ECR (thus the FLAG_ACKED
check before calling tcp_ack_update_rtt()).  Therefore this new
RTT source is complementary to existing ACK and TS RTT mechanisms.

This patch does not update the RTO. It is done in the next patch.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
ChangeLog in v2:
	use earliest newly sacked block to measure RTT

 net/ipv4/tcp_input.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c7398f0..b85bc7c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1048,6 +1048,7 @@ struct tcp_sacktag_state {
 	int reord;
 	int fack_count;
 	int flag;
+	s32 rtt; /* RTT measured by SACKing never-retransmitted data */
 };
 
 /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1108,7 +1109,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 static u8 tcp_sacktag_one(struct sock *sk,
 			  struct tcp_sacktag_state *state, u8 sacked,
 			  u32 start_seq, u32 end_seq,
-			  bool dup_sack, int pcount)
+			  int dup_sack, int pcount, u32 xmit_time)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fack_count = state->fack_count;
@@ -1148,6 +1149,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
 							   state->reord);
 				if (!after(end_seq, tp->high_seq))
 					state->flag |= FLAG_ORIG_SACK_ACKED;
+				/* Pick the earliest sequence sacked for RTT */
+				if (state->rtt < 0)
+					state->rtt = tcp_time_stamp - xmit_time;
 			}
 
 			if (sacked & TCPCB_LOST) {
@@ -1205,7 +1209,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 * tcp_highest_sack_seq() when skb is highest_sack.
 	 */
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
-			start_seq, end_seq, dup_sack, pcount);
+			start_seq, end_seq, dup_sack, pcount,
+			TCP_SKB_CB(skb)->when);
 
 	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
@@ -1479,7 +1484,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 						TCP_SKB_CB(skb)->seq,
 						TCP_SKB_CB(skb)->end_seq,
 						dup_sack,
-						tcp_skb_pcount(skb));
+						tcp_skb_pcount(skb),
+						TCP_SKB_CB(skb)->when);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
 				    tcp_highest_sack_seq(tp)))
@@ -1536,7 +1542,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
 
 static int
 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
-			u32 prior_snd_una)
+			u32 prior_snd_una, s32 *sack_rtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1554,6 +1560,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 
 	state.flag = 0;
 	state.reord = tp->packets_out;
+	state.rtt = -1;
 
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
@@ -1737,6 +1744,7 @@ out:
 	WARN_ON((int)tp->retrans_out < 0);
 	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
 #endif
+	*sack_rtt = state.rtt;
 	return state.flag;
 }
 
@@ -3254,6 +3262,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int prior_packets = tp->packets_out;
 	const int prior_unsacked = tp->packets_out - tp->sacked_out;
 	int acked = 0; /* Number of packets newly acked */
+	s32 sack_rtt = -1;
 
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
@@ -3310,7 +3319,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
 		if (TCP_SKB_CB(skb)->sacked)
-			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+							&sack_rtt);
 
 		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
 			flag |= FLAG_ECE;
@@ -3382,7 +3392,8 @@ old_ack:
 	 * If data was DSACKed, see if we can undo a cwnd reduction.
 	 */
 	if (TCP_SKB_CB(skb)->sacked) {
-		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+						&sack_rtt);
 		tcp_fastretrans_alert(sk, acked, prior_unsacked,
 				      is_dupack, flag);
 	}
-- 
1.8.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 net-next 4/4] tcp: use RTT from SACK for RTO
  2013-07-22 23:20 [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling Yuchung Cheng
  2013-07-22 23:20 ` [PATCH v2 net-next 2/4] tcp: prefer packet timing to TS-ECR for RTT Yuchung Cheng
  2013-07-22 23:20 ` [PATCH v2 net-next 3/4] tcp: measure RTT from new SACK Yuchung Cheng
@ 2013-07-22 23:20 ` Yuchung Cheng
  2013-07-23  1:02 ` [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling David Miller
  3 siblings, 0 replies; 6+ messages in thread
From: Yuchung Cheng @ 2013-07-22 23:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: netdev, Yuchung Cheng

If RTT is not available because Karn's check has failed or no
new packet is acked, use the RTT measured from SACK to estimate
the RTO. The sender can continue to estimate the RTO during loss
recovery or reordering event upon receiving non-partial ACKs.

This also changes when the RTO is re-armed. Previously it is
only re-armed when some data is cummulatively acknowledged (i.e.,
SND.UNA advances), but now it is re-armed whenever RTT estimator
is updated. This feature is particularly useful to reduce spurious
timeout for buffer bloat including cellular carriers [1], and
RTT estimation on reordering events.

[1] "An In-depth Study of LTE: Effect of Network Protocol and
 Application Behavior on Performance", In Proc. of SIGCOMM 2013

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
---
 net/ipv4/tcp_input.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b85bc7c..b61274b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2800,8 +2800,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 	tcp_xmit_retransmit_queue(sk);
 }
 
-static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      s32 seq_rtt)
+static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
+				      s32 seq_rtt, s32 sack_rtt)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2813,6 +2813,9 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 	if (flag & FLAG_RETRANS_DATA_ACKED)
 		seq_rtt = -1;
 
+	if (seq_rtt < 0)
+		seq_rtt = sack_rtt;
+
 	/* RTTM Rule: A TSecr value received in a segment is used to
 	 * update the averaged RTT measurement only if the segment
 	 * acknowledges some new data, i.e., only if it advances the
@@ -2823,13 +2826,14 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
 
 	if (seq_rtt < 0)
-		return;
+		return false;
 
 	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 
 	/* RFC6298: only reset backoff on valid RTT measurement. */
 	inet_csk(sk)->icsk_backoff = 0;
+	return true;
 }
 
 /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
@@ -2840,7 +2844,7 @@ static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
 
 	if (tp->lsndtime && !tp->total_retrans)
 		seq_rtt = tcp_time_stamp - tp->lsndtime;
-	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt);
+	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
 }
 
 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -2929,7 +2933,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * arrived at the other end.
  */
 static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-			       u32 prior_snd_una)
+			       u32 prior_snd_una, s32 sack_rtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3019,6 +3023,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
+	if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
+	    (flag & FLAG_ACKED))
+		tcp_rearm_rto(sk);
+
 	if (flag & FLAG_ACKED) {
 		const struct tcp_congestion_ops *ca_ops
 			= inet_csk(sk)->icsk_ca_ops;
@@ -3028,9 +3036,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			tcp_mtup_probe_success(sk);
 		}
 
-		tcp_ack_update_rtt(sk, flag, seq_rtt);
-		tcp_rearm_rto(sk);
-
 		if (tcp_is_reno(tp)) {
 			tcp_remove_reno_sacks(sk, pkts_acked);
 		} else {
@@ -3339,7 +3344,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	/* See if we can take anything off of the retransmit queue. */
 	acked = tp->packets_out;
-	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
 	acked -= tp->packets_out;
 
 	if (tcp_ack_is_dubious(sk, flag)) {
-- 
1.8.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling
  2013-07-22 23:20 [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling Yuchung Cheng
                   ` (2 preceding siblings ...)
  2013-07-22 23:20 ` [PATCH v2 net-next 4/4] tcp: use RTT from SACK for RTO Yuchung Cheng
@ 2013-07-23  1:02 ` David Miller
  2013-07-23 15:54   ` Yuchung Cheng
  3 siblings, 1 reply; 6+ messages in thread
From: David Miller @ 2013-07-23  1:02 UTC (permalink / raw)
  To: ycheng; +Cc: ncardwell, edumazet, nanditad, netdev

From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 22 Jul 2013 16:20:45 -0700

> This patch series improve RTT sampling in three ways:
> 1. Sample RTT during fast recovery and reordering events.
> 2. Favor ack-based RTT to timestamps because of broken TS ECR fields
> 3. Consolidate the RTT measurement logic.
> 
> The first patch consolidates SYNACK and other RTT measurement to use a
> central function tcp_ack_update_rtt(). A (small) bonus is now SYNACK
> RTT measurement happens after PAWS check, potentially reducing the
> impact of RTO seeding on bad TCP timestamps values.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

I like this series a lot, applied, thanks!

Next time please submit a seperate "[PATCH 0/4] xxx" posting.

If it has non-trivial content, I use the "0" posting as the
commit message of a merge commit when I apply your series.

What I did here was take the text:

====================
This patch series improve RTT sampling in three ways:
1. Sample RTT during fast recovery and reordering events.
2. Favor ack-based RTT to timestamps because of broken TS ECR fields
3. Consolidate the RTT measurement logic.
====================

and place it into that merge commit, removing it from the patch #1
commit message.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling
  2013-07-23  1:02 ` [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling David Miller
@ 2013-07-23 15:54   ` Yuchung Cheng
  0 siblings, 0 replies; 6+ messages in thread
From: Yuchung Cheng @ 2013-07-23 15:54 UTC (permalink / raw)
  To: David Miller; +Cc: Neal Cardwell, Eric Dumazet, Nandita Dukkipati, netdev

On Mon, Jul 22, 2013 at 6:02 PM, David Miller <davem@davemloft.net> wrote:
> From: Yuchung Cheng <ycheng@google.com>
> Date: Mon, 22 Jul 2013 16:20:45 -0700
>
>> This patch series improve RTT sampling in three ways:
>> 1. Sample RTT during fast recovery and reordering events.
>> 2. Favor ack-based RTT to timestamps because of broken TS ECR fields
>> 3. Consolidate the RTT measurement logic.
>>
>> The first patch consolidates SYNACK and other RTT measurement to use a
>> central function tcp_ack_update_rtt(). A (small) bonus is now SYNACK
>> RTT measurement happens after PAWS check, potentially reducing the
>> impact of RTO seeding on bad TCP timestamps values.
>>
>> Signed-off-by: Yuchung Cheng <ycheng@google.com>
>
> I like this series a lot, applied, thanks!
>
> Next time please submit a seperate "[PATCH 0/4] xxx" posting.
>
> If it has non-trivial content, I use the "0" posting as the
> commit message of a merge commit when I apply your series.
>
> What I did here was take the text:
>
> ====================
> This patch series improve RTT sampling in three ways:
> 1. Sample RTT during fast recovery and reordering events.
> 2. Favor ack-based RTT to timestamps because of broken TS ECR fields
> 3. Consolidate the RTT measurement logic.
> ====================
>
> and place it into that merge commit, removing it from the patch #1
> commit message.
Thank you David. Sorry for the extra hassle. Will definitely do that
for next patch series.

>
> Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-07-23 15:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-22 23:20 [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling Yuchung Cheng
2013-07-22 23:20 ` [PATCH v2 net-next 2/4] tcp: prefer packet timing to TS-ECR for RTT Yuchung Cheng
2013-07-22 23:20 ` [PATCH v2 net-next 3/4] tcp: measure RTT from new SACK Yuchung Cheng
2013-07-22 23:20 ` [PATCH v2 net-next 4/4] tcp: use RTT from SACK for RTO Yuchung Cheng
2013-07-23  1:02 ` [PATCH v2 net-next 1/4] tcp: consolidate SYNACK RTT sampling David Miller
2013-07-23 15:54   ` Yuchung Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).