netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yuchung Cheng <ycheng@google.com>
To: davem@davemloft.net, ncardwell@google.com, edumazet@google.com
Cc: nanditad@google.com, netdev@vger.kernel.org,
	Yuchung Cheng <ycheng@google.com>
Subject: [PATCH 3/4 net-next] tcp: measure RTT from new SACK
Date: Sat, 20 Jul 2013 22:39:29 -0700	[thread overview]
Message-ID: <1374385170-358-3-git-send-email-ycheng@google.com> (raw)
In-Reply-To: <1374385170-358-1-git-send-email-ycheng@google.com>

Take RTT sample if an ACK selectively acks some sequences that
have never been retransmitted. The Karn's algorithm does not apply
even if that ACK (s)acks other retransmitted sequences, because it
must been generated by an original but perhaps out-of-order packet.
There is no ambiguity.

Such RTT samples allow the sender to estimate the RTO during loss
recovery and packet reordering events. It is still useful even with
TCP timestamps. That's because during these events the SND.UNA may
not advance preventing RTT samples from TS ECR (thus the FLAG_ACKED
check before calling tcp_ack_update_rtt()).  Therefore this new
RTT source is complementary to existing ACK and TS RTT mechanisms.

This patch does not update the RTO. It is done in the next patch.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_input.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b6357ec..46aeef1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1048,6 +1048,7 @@ struct tcp_sacktag_state {
 	int reord;
 	int fack_count;
 	int flag;
+	s32 rtt; /* RTT measured by SACKing never-retransmitted data */
 };
 
 /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1108,7 +1109,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 static u8 tcp_sacktag_one(struct sock *sk,
 			  struct tcp_sacktag_state *state, u8 sacked,
 			  u32 start_seq, u32 end_seq,
-			  bool dup_sack, int pcount)
+			  int dup_sack, int pcount, u32 lsndtime)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fack_count = state->fack_count;
@@ -1148,6 +1149,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
 							   state->reord);
 				if (!after(end_seq, tp->high_seq))
 					state->flag |= FLAG_ORIG_SACK_ACKED;
+				state->rtt = tcp_time_stamp - lsndtime;
 			}
 
 			if (sacked & TCPCB_LOST) {
@@ -1205,7 +1207,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 * tcp_highest_sack_seq() when skb is highest_sack.
 	 */
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
-			start_seq, end_seq, dup_sack, pcount);
+			start_seq, end_seq, dup_sack, pcount,
+			TCP_SKB_CB(skb)->when);
 
 	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
@@ -1479,7 +1482,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 						TCP_SKB_CB(skb)->seq,
 						TCP_SKB_CB(skb)->end_seq,
 						dup_sack,
-						tcp_skb_pcount(skb));
+						tcp_skb_pcount(skb),
+						TCP_SKB_CB(skb)->when);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
 				    tcp_highest_sack_seq(tp)))
@@ -1536,7 +1540,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
 
 static int
 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
-			u32 prior_snd_una)
+			u32 prior_snd_una, s32 *sack_rtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1554,6 +1558,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 
 	state.flag = 0;
 	state.reord = tp->packets_out;
+	state.rtt = -1;
 
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
@@ -1737,6 +1742,7 @@ out:
 	WARN_ON((int)tp->retrans_out < 0);
 	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
 #endif
+	*sack_rtt = state.rtt;
 	return state.flag;
 }
 
@@ -3254,6 +3260,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int prior_packets = tp->packets_out;
 	const int prior_unsacked = tp->packets_out - tp->sacked_out;
 	int acked = 0; /* Number of packets newly acked */
+	s32 sack_rtt = -1;
 
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
@@ -3310,7 +3317,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
 		if (TCP_SKB_CB(skb)->sacked)
-			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+							&sack_rtt);
 
 		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
 			flag |= FLAG_ECE;
@@ -3382,7 +3390,8 @@ old_ack:
 	 * If data was DSACKed, see if we can undo a cwnd reduction.
 	 */
 	if (TCP_SKB_CB(skb)->sacked) {
-		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+						&sack_rtt);
 		tcp_fastretrans_alert(sk, acked, prior_unsacked,
 				      is_dupack, flag);
 	}
-- 
1.8.3

  parent reply	other threads:[~2013-07-21  5:39 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-21  5:39 [PATCH 1/4 net-next] tcp: consolidate SYNACK RTT sampling Yuchung Cheng
2013-07-21  5:39 ` [PATCH 2/4 net-next] tcp: prefer packet timing to TS-ECR for RTT Yuchung Cheng
2013-07-22  3:17   ` Neal Cardwell
2013-07-21  5:39 ` Yuchung Cheng [this message]
2013-07-22  3:32   ` [PATCH 3/4 net-next] tcp: measure RTT from new SACK Neal Cardwell
2013-07-21  5:39 ` [PATCH 4/4 net-next] tcp: use RTT from SACK for RTO Yuchung Cheng
2013-07-22  3:40   ` Neal Cardwell
2013-07-22  2:45 ` [PATCH 1/4 net-next] tcp: consolidate SYNACK RTT sampling Neal Cardwell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1374385170-358-3-git-send-email-ycheng@google.com \
    --to=ycheng@google.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=nanditad@google.com \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --subject='Re: [PATCH 3/4 net-next] tcp: measure RTT from new SACK' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).