* [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack()
@ 2015-04-30 16:23 Kenneth Klette Jonassen
2015-04-30 16:23 ` [PATCH net-next 2/3] tcp: improve RTT from SACK for CC Kenneth Klette Jonassen
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Kenneth Klette Jonassen @ 2015-04-30 16:23 UTC (permalink / raw)
To: netdev; +Cc: Kenneth Klette Jonassen, Yuchung Cheng, Eric Dumazet
Later patch passes two values set in tcp_sacktag_one() to
tcp_clean_rtx_queue(). Prepare passing them via struct tcp_sacktag_state.
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
---
net/ipv4/tcp_input.c | 45 ++++++++++++++++++++++-----------------------
1 file changed, 22 insertions(+), 23 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bc790ea..9902cf1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
- u32 prior_snd_una, long *sack_rtt_us)
+ u32 prior_snd_una, struct tcp_sacktag_state *state)
{
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
- struct tcp_sacktag_state state;
struct sk_buff *skb;
int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
int used_sacks;
@@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
int i, j;
int first_sack_index;
- state.flag = 0;
- state.reord = tp->packets_out;
- state.rtt_us = -1L;
+ state->flag = 0;
+ state->reord = tp->packets_out;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
@@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
- state.flag |= FLAG_DSACKING_ACK;
+ state->flag |= FLAG_DSACKING_ACK;
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
@@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
skb = tcp_write_queue_head(sk);
- state.fack_count = 0;
+ state->fack_count = 0;
i = 0;
if (!tp->sacked_out) {
@@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, &state,
+ skb = tcp_sacktag_skip(skb, sk, state,
start_seq);
skb = tcp_sacktag_walk(skb, sk, next_dup,
- &state,
+ state,
start_seq,
cache->start_seq,
dup_sack);
@@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
goto advance_sp;
skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
- &state,
+ state,
cache->end_seq);
/* ...tail remains todo... */
@@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state.fack_count = tp->fackets_out;
+ state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
+ skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
@@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state.fack_count = tp->fackets_out;
+ state->fack_count = tp->fackets_out;
}
- skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, state, start_seq);
walk:
- skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
+ skb = tcp_sacktag_walk(skb, sk, next_dup, state,
start_seq, end_seq, dup_sack);
advance_sp:
@@ -1820,9 +1818,9 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- if ((state.reord < tp->fackets_out) &&
+ if ((state->reord < tp->fackets_out) &&
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
+ tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
tcp_mark_lost_retrans(sk);
tcp_verify_left_out(tp);
@@ -1834,8 +1832,7 @@ out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
- *sack_rtt_us = state.rtt_us;
- return state.flag;
+ return state->flag;
}
/* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -3459,6 +3456,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sacktag_state sack_state;
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3467,7 +3465,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
- long sack_rtt_us = -1L;
+
+ sack_state.rtt_us = -1L;
/* We very likely will need to access write queue head. */
prefetchw(sk->sk_write_queue.next);
@@ -3531,7 +3530,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt_us);
+ &sack_state);
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
flag |= FLAG_ECE;
@@ -3556,7 +3555,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
- sack_rtt_us);
+ sack_state.rtt_us);
acked -= tp->packets_out;
/* Advance cwnd if state allows */
@@ -3608,7 +3607,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt_us);
+ &sack_state);
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
--
2.1.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH net-next 2/3] tcp: improve RTT from SACK for CC
2015-04-30 16:23 [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Kenneth Klette Jonassen
@ 2015-04-30 16:23 ` Kenneth Klette Jonassen
2015-04-30 17:35 ` Yuchung Cheng
2015-04-30 16:23 ` [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK Kenneth Klette Jonassen
2015-04-30 17:26 ` [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Yuchung Cheng
2 siblings, 1 reply; 7+ messages in thread
From: Kenneth Klette Jonassen @ 2015-04-30 16:23 UTC (permalink / raw)
To: netdev; +Cc: Kenneth Klette Jonassen, Yuchung Cheng, Eric Dumazet
tcp_sacktag_one() always picks the earliest sequence SACKed for RTT.
This might not make sense for congestion control in cases where:
1. ACKs are lost, i.e. a SACK following a lost SACK covers both
new and old segments at the receiver.
2. The receiver disregards the RFC 5681 recommendation to immediately
ACK out-of-order segments.
Give congestion control a RTT for the latest segment SACKed, which is the
most accurate RTT estimate, but preserve the conservative RTT for RTO.
Removes the call to skb_mstamp_get() in tcp_sacktag_one().
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
---
net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9902cf1..32bac6a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1130,7 +1130,14 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sacktag_state {
int reord;
int fack_count;
- long rtt_us; /* RTT measured by SACKing never-retransmitted data */
+ /* Timestamps for earliest and latest never-retransmitted segment
+ * that was SACKed. RTO needs the earliest RTT to be conservative
+ * against receivers that might delay SACKing (RFC 5681 does not
+ * require ACKing out-of-order segments immediately), but congestion
+ * control should still get an accurate delay signal.
+ */
+ struct skb_mstamp first_sackt;
+ struct skb_mstamp last_sackt;
int flag;
};
@@ -1233,14 +1240,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
- /* Pick the earliest sequence sacked for RTT */
- if (state->rtt_us < 0) {
- struct skb_mstamp now;
-
- skb_mstamp_get(&now);
- state->rtt_us = skb_mstamp_us_delta(&now,
- xmit_time);
- }
+ if (state->first_sackt.v64 == 0)
+ state->first_sackt = *xmit_time;
+ state->last_sackt = *xmit_time;
}
if (sacked & TCPCB_LOST) {
@@ -3049,7 +3051,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, long sack_rtt_us)
+ u32 prior_snd_una,
+ struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt, now;
@@ -3057,8 +3060,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
- long ca_seq_rtt_us = -1L;
+ long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
+ long ca_rtt_us = -1L;
struct sk_buff *skb;
u32 pkts_acked = 0;
bool rtt_update;
@@ -3147,7 +3151,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
skb_mstamp_get(&now);
if (likely(first_ackt.v64)) {
seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ }
+ if (sack->first_sackt.v64) {
+ sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
+ ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
}
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
@@ -3178,10 +3186,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked) {
- long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
- ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
- }
+ if (ca_ops->pkts_acked)
+ ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
@@ -3466,7 +3472,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
- sack_state.rtt_us = -1L;
+ sack_state.first_sackt.v64 = 0;
/* We very likely will need to access write queue head. */
prefetchw(sk->sk_write_queue.next);
@@ -3555,7 +3561,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
- sack_state.rtt_us);
+ &sack_state);
acked -= tp->packets_out;
/* Advance cwnd if state allows */
--
2.1.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK
2015-04-30 16:23 [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Kenneth Klette Jonassen
2015-04-30 16:23 ` [PATCH net-next 2/3] tcp: improve RTT from SACK for CC Kenneth Klette Jonassen
@ 2015-04-30 16:23 ` Kenneth Klette Jonassen
2015-04-30 17:25 ` Yuchung Cheng
2015-04-30 17:26 ` [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Yuchung Cheng
2 siblings, 1 reply; 7+ messages in thread
From: Kenneth Klette Jonassen @ 2015-04-30 16:23 UTC (permalink / raw)
To: netdev; +Cc: Kenneth Klette Jonassen, Yuchung Cheng, Eric Dumazet
Invoking pkts_acked is currently conditioned on FLAG_ACKED: receiving a
cumulative ACK of new data, or ACK with SYN flag set.
Remove this condition so that CC may get RTT measurements from all SACKs.
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
---
net/ipv4/tcp_input.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 32bac6a..e5089c5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3161,9 +3161,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
if (flag & FLAG_ACKED) {
- const struct tcp_congestion_ops *ca_ops
- = inet_csk(sk)->icsk_ca_ops;
-
tcp_rearm_rto(sk);
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3186,9 +3183,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked)
- ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3198,6 +3192,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_rearm_rto(sk);
}
+ if (icsk->icsk_ca_ops->pkts_acked)
+ icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
+
#if FASTRETRANS_DEBUG > 0
WARN_ON((int)tp->sacked_out < 0);
WARN_ON((int)tp->lost_out < 0);
--
2.1.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK
2015-04-30 16:23 ` [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK Kenneth Klette Jonassen
@ 2015-04-30 17:25 ` Yuchung Cheng
2015-04-30 22:20 ` Kenneth Klette Jonassen
0 siblings, 1 reply; 7+ messages in thread
From: Yuchung Cheng @ 2015-04-30 17:25 UTC (permalink / raw)
To: Kenneth Klette Jonassen; +Cc: netdev, Eric Dumazet, Neal Cardwell
On Thu, Apr 30, 2015 at 9:23 AM, Kenneth Klette Jonassen
<kennetkl@ifi.uio.no> wrote:
>
> Invoking pkts_acked is currently conditioned on FLAG_ACKED: receiving a
> cumulative ACK of new data, or ACK with SYN flag set.
>
> Remove this condition so that CC may get RTT measurements from all SACKs.
>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
> ---
> net/ipv4/tcp_input.c | 9 +++------
> 1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 32bac6a..e5089c5 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3161,9 +3161,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
> rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
>
> if (flag & FLAG_ACKED) {
> - const struct tcp_congestion_ops *ca_ops
> - = inet_csk(sk)->icsk_ca_ops;
> -
> tcp_rearm_rto(sk);
> if (unlikely(icsk->icsk_mtup.probe_size &&
> !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
> @@ -3186,9 +3183,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
>
> tp->fackets_out -= min(pkts_acked, tp->fackets_out);
>
> - if (ca_ops->pkts_acked)
> - ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
> -
> } else if (skb && rtt_update && sack_rtt_us >= 0 &&
> sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
> /* Do not re-arm RTO if the sack RTT is measured from data sent
> @@ -3198,6 +3192,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
> tcp_rearm_rto(sk);
> }
>
> + if (icsk->icsk_ca_ops->pkts_acked)
> + icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
> +
There might be congestion control that assumes pkts_acked > 0 or data
is cumulatively when this is called. Did you audit that?
> #if FASTRETRANS_DEBUG > 0
> WARN_ON((int)tp->sacked_out < 0);
> WARN_ON((int)tp->lost_out < 0);
> --
> 2.1.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack()
2015-04-30 16:23 [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Kenneth Klette Jonassen
2015-04-30 16:23 ` [PATCH net-next 2/3] tcp: improve RTT from SACK for CC Kenneth Klette Jonassen
2015-04-30 16:23 ` [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK Kenneth Klette Jonassen
@ 2015-04-30 17:26 ` Yuchung Cheng
2 siblings, 0 replies; 7+ messages in thread
From: Yuchung Cheng @ 2015-04-30 17:26 UTC (permalink / raw)
To: Kenneth Klette Jonassen; +Cc: netdev, Eric Dumazet
On Thu, Apr 30, 2015 at 9:23 AM, Kenneth Klette Jonassen
<kennetkl@ifi.uio.no> wrote:
> Later patch passes two values set in tcp_sacktag_one() to
> tcp_clean_rtx_queue(). Prepare passing them via struct tcp_sacktag_state.
>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
Acked-by: Yuchung Cheng <ycheng@google.com>
> ---
> net/ipv4/tcp_input.c | 45 ++++++++++++++++++++++-----------------------
> 1 file changed, 22 insertions(+), 23 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index bc790ea..9902cf1 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
>
> static int
> tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> - u32 prior_snd_una, long *sack_rtt_us)
> + u32 prior_snd_una, struct tcp_sacktag_state *state)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> const unsigned char *ptr = (skb_transport_header(ack_skb) +
> @@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
> struct tcp_sack_block sp[TCP_NUM_SACKS];
> struct tcp_sack_block *cache;
> - struct tcp_sacktag_state state;
> struct sk_buff *skb;
> int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
> int used_sacks;
> @@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> int i, j;
> int first_sack_index;
>
> - state.flag = 0;
> - state.reord = tp->packets_out;
> - state.rtt_us = -1L;
> + state->flag = 0;
> + state->reord = tp->packets_out;
>
> if (!tp->sacked_out) {
> if (WARN_ON(tp->fackets_out))
> @@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
> num_sacks, prior_snd_una);
> if (found_dup_sack)
> - state.flag |= FLAG_DSACKING_ACK;
> + state->flag |= FLAG_DSACKING_ACK;
>
> /* Eliminate too old ACKs, but take into
> * account more or less fresh ones, they can
> @@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> }
>
> skb = tcp_write_queue_head(sk);
> - state.fack_count = 0;
> + state->fack_count = 0;
> i = 0;
>
> if (!tp->sacked_out) {
> @@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
>
> /* Head todo? */
> if (before(start_seq, cache->start_seq)) {
> - skb = tcp_sacktag_skip(skb, sk, &state,
> + skb = tcp_sacktag_skip(skb, sk, state,
> start_seq);
> skb = tcp_sacktag_walk(skb, sk, next_dup,
> - &state,
> + state,
> start_seq,
> cache->start_seq,
> dup_sack);
> @@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> goto advance_sp;
>
> skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
> - &state,
> + state,
> cache->end_seq);
>
> /* ...tail remains todo... */
> @@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> skb = tcp_highest_sack(sk);
> if (!skb)
> break;
> - state.fack_count = tp->fackets_out;
> + state->fack_count = tp->fackets_out;
> cache++;
> goto walk;
> }
>
> - skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
> + skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
> /* Check overlap against next cached too (past this one already) */
> cache++;
> continue;
> @@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
> skb = tcp_highest_sack(sk);
> if (!skb)
> break;
> - state.fack_count = tp->fackets_out;
> + state->fack_count = tp->fackets_out;
> }
> - skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
> + skb = tcp_sacktag_skip(skb, sk, state, start_seq);
>
> walk:
> - skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
> + skb = tcp_sacktag_walk(skb, sk, next_dup, state,
> start_seq, end_seq, dup_sack);
>
> advance_sp:
> @@ -1820,9 +1818,9 @@ advance_sp:
> for (j = 0; j < used_sacks; j++)
> tp->recv_sack_cache[i++] = sp[j];
>
> - if ((state.reord < tp->fackets_out) &&
> + if ((state->reord < tp->fackets_out) &&
> ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
> - tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
> + tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
>
> tcp_mark_lost_retrans(sk);
> tcp_verify_left_out(tp);
> @@ -1834,8 +1832,7 @@ out:
> WARN_ON((int)tp->retrans_out < 0);
> WARN_ON((int)tcp_packets_in_flight(tp) < 0);
> #endif
> - *sack_rtt_us = state.rtt_us;
> - return state.flag;
> + return state->flag;
> }
>
> /* Limits sacked_out so that sum with lost_out isn't ever larger than
> @@ -3459,6 +3456,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> {
> struct inet_connection_sock *icsk = inet_csk(sk);
> struct tcp_sock *tp = tcp_sk(sk);
> + struct tcp_sacktag_state sack_state;
> u32 prior_snd_una = tp->snd_una;
> u32 ack_seq = TCP_SKB_CB(skb)->seq;
> u32 ack = TCP_SKB_CB(skb)->ack_seq;
> @@ -3467,7 +3465,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> int prior_packets = tp->packets_out;
> const int prior_unsacked = tp->packets_out - tp->sacked_out;
> int acked = 0; /* Number of packets newly acked */
> - long sack_rtt_us = -1L;
> +
> + sack_state.rtt_us = -1L;
>
> /* We very likely will need to access write queue head. */
> prefetchw(sk->sk_write_queue.next);
> @@ -3531,7 +3530,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>
> if (TCP_SKB_CB(skb)->sacked)
> flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
> - &sack_rtt_us);
> + &sack_state);
>
> if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
> flag |= FLAG_ECE;
> @@ -3556,7 +3555,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> /* See if we can take anything off of the retransmit queue. */
> acked = tp->packets_out;
> flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
> - sack_rtt_us);
> + sack_state.rtt_us);
> acked -= tp->packets_out;
>
> /* Advance cwnd if state allows */
> @@ -3608,7 +3607,7 @@ old_ack:
> */
> if (TCP_SKB_CB(skb)->sacked) {
> flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
> - &sack_rtt_us);
> + &sack_state);
> tcp_fastretrans_alert(sk, acked, prior_unsacked,
> is_dupack, flag);
> }
> --
> 2.1.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 2/3] tcp: improve RTT from SACK for CC
2015-04-30 16:23 ` [PATCH net-next 2/3] tcp: improve RTT from SACK for CC Kenneth Klette Jonassen
@ 2015-04-30 17:35 ` Yuchung Cheng
0 siblings, 0 replies; 7+ messages in thread
From: Yuchung Cheng @ 2015-04-30 17:35 UTC (permalink / raw)
To: Kenneth Klette Jonassen; +Cc: netdev, Eric Dumazet
On Thu, Apr 30, 2015 at 9:23 AM, Kenneth Klette Jonassen
<kennetkl@ifi.uio.no> wrote:
> tcp_sacktag_one() always picks the earliest sequence SACKed for RTT.
> This might not make sense for congestion control in cases where:
>
> 1. ACKs are lost, i.e. a SACK following a lost SACK covers both
> new and old segments at the receiver.
> 2. The receiver disregards the RFC 5681 recommendation to immediately
> ACK out-of-order segments.
>
> Give congestion control a RTT for the latest segment SACKed, which is the
> most accurate RTT estimate, but preserve the conservative RTT for RTO.
>
> Removes the call to skb_mstamp_get() in tcp_sacktag_one().
>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
> ---
> net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++------------------
> 1 file changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 9902cf1..32bac6a 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1130,7 +1130,14 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
> struct tcp_sacktag_state {
> int reord;
> int fack_count;
> - long rtt_us; /* RTT measured by SACKing never-retransmitted data */
> + /* Timestamps for earliest and latest never-retransmitted segment
> + * that was SACKed. RTO needs the earliest RTT to be conservative
> + * against receivers that might delay SACKing (RFC 5681 does not
> + * require ACKing out-of-order segments immediately), but congestion
> + * control should still get an accurate delay signal.
> + */
FWIW this comment is a little misleading. The reason we took the earliest
timestamp for sack RTT was not b/c RFC 5681 uses SHOULD on immediate ACKs.
The reason was to stay conservative of ACK compression effects. AFAIK
major stacks (almost?) always send immediate ACKs on out-of-order packets.
The rest of the patch looks good!
> + struct skb_mstamp first_sackt;
> + struct skb_mstamp last_sackt;
> int flag;
> };
>
> @@ -1233,14 +1240,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
> state->reord);
> if (!after(end_seq, tp->high_seq))
> state->flag |= FLAG_ORIG_SACK_ACKED;
> - /* Pick the earliest sequence sacked for RTT */
> - if (state->rtt_us < 0) {
> - struct skb_mstamp now;
> -
> - skb_mstamp_get(&now);
> - state->rtt_us = skb_mstamp_us_delta(&now,
> - xmit_time);
> - }
> + if (state->first_sackt.v64 == 0)
> + state->first_sackt = *xmit_time;
> + state->last_sackt = *xmit_time;
> }
>
> if (sacked & TCPCB_LOST) {
> @@ -3049,7 +3051,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
> * arrived at the other end.
> */
> static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
> - u32 prior_snd_una, long sack_rtt_us)
> + u32 prior_snd_una,
> + struct tcp_sacktag_state *sack)
> {
> const struct inet_connection_sock *icsk = inet_csk(sk);
> struct skb_mstamp first_ackt, last_ackt, now;
> @@ -3057,8 +3060,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
> u32 prior_sacked = tp->sacked_out;
> u32 reord = tp->packets_out;
> bool fully_acked = true;
> - long ca_seq_rtt_us = -1L;
> + long sack_rtt_us = -1L;
> long seq_rtt_us = -1L;
> + long ca_rtt_us = -1L;
> struct sk_buff *skb;
> u32 pkts_acked = 0;
> bool rtt_update;
> @@ -3147,7 +3151,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
> skb_mstamp_get(&now);
> if (likely(first_ackt.v64)) {
> seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
> - ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
> + ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
> + }
> + if (sack->first_sackt.v64) {
> + sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
> + ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
> }
>
> rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
> @@ -3178,10 +3186,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
>
> tp->fackets_out -= min(pkts_acked, tp->fackets_out);
>
> - if (ca_ops->pkts_acked) {
> - long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
> - ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
> - }
> + if (ca_ops->pkts_acked)
> + ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
>
> } else if (skb && rtt_update && sack_rtt_us >= 0 &&
> sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
> @@ -3466,7 +3472,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> const int prior_unsacked = tp->packets_out - tp->sacked_out;
> int acked = 0; /* Number of packets newly acked */
>
> - sack_state.rtt_us = -1L;
> + sack_state.first_sackt.v64 = 0;
>
> /* We very likely will need to access write queue head. */
> prefetchw(sk->sk_write_queue.next);
> @@ -3555,7 +3561,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> /* See if we can take anything off of the retransmit queue. */
> acked = tp->packets_out;
> flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
> - sack_state.rtt_us);
> + &sack_state);
> acked -= tp->packets_out;
>
> /* Advance cwnd if state allows */
> --
> 2.1.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK
2015-04-30 17:25 ` Yuchung Cheng
@ 2015-04-30 22:20 ` Kenneth Klette Jonassen
0 siblings, 0 replies; 7+ messages in thread
From: Kenneth Klette Jonassen @ 2015-04-30 22:20 UTC (permalink / raw)
To: Yuchung Cheng; +Cc: netdev, Eric Dumazet, Neal Cardwell
> There might be congestion control that assumes pkts_acked > 0 or data
> is cumulatively when this is called. Did you audit that?
HTCP, Illinois, Yeah:
These modules save the pkts_acked count internally and use it to grow
snd_cwnd_cnt in cong_avoid.
This patch ensures they grow by the same count that is passed to cong_avoid().
BIC:
Works well with pkts_acked = 0.
These modules have pkts_acked functions for RTT, but are not using the
acked count:
Cubic, LP, Vegas, Veno, Westwood.
PS on BIC:
Impossibly high counts can potentially overflow ca->delayed_ack to 0
and cause division by zero, e.g. (u32) -30.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2015-04-30 22:20 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-30 16:23 [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Kenneth Klette Jonassen
2015-04-30 16:23 ` [PATCH net-next 2/3] tcp: improve RTT from SACK for CC Kenneth Klette Jonassen
2015-04-30 17:35 ` Yuchung Cheng
2015-04-30 16:23 ` [PATCH net-next 3/3] tcp: invoke pkts_acked hook on every ACK Kenneth Klette Jonassen
2015-04-30 17:25 ` Yuchung Cheng
2015-04-30 22:20 ` Kenneth Klette Jonassen
2015-04-30 17:26 ` [PATCH net-next 1/3] tcp: move struct tcp_sacktag_state to tcp_ack() Yuchung Cheng
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.