All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
@ 2021-09-23 21:17 Luke Hsiao
  2021-09-24 13:30 ` patchwork-bot+netdevbpf
  2021-09-24 20:20 ` Jakub Kicinski
  0 siblings, 2 replies; 6+ messages in thread
From: Luke Hsiao @ 2021-09-23 21:17 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Yuchung Cheng, Lawrence Brakmo, Neal Cardwell,
	Eric Dumazet, Luke Hsiao

From: Yuchung Cheng <ycheng@google.com>

In order to track CE marks per rate sample (one round trip), TCP needs a
per-skb header field to record the tp->delivered_ce count when the skb
was sent. To make space, we replace the "last_in_flight" field which is
used exclusively for NV congestion control. The stat needed by NV can be
alternatively approximated by existing stats tcp_sock delivered and
mss_cache.

This patch counts the number of packets delivered which have CE marks in
the rate sample, using similar approach of delivery accounting.

Cc: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Luke Hsiao <lukehsiao@google.com>
---
 include/net/tcp.h     |  9 ++++++---
 net/ipv4/tcp_input.c  | 11 +++++------
 net/ipv4/tcp_output.c |  2 --
 net/ipv4/tcp_rate.c   |  6 ++++++
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 673c3b01e287..32cf6c01f403 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -874,10 +874,11 @@ struct tcp_skb_cb {
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
 	union {
 		struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
 			/* There is space for up to 24 bytes */
-			__u32 in_flight:30,/* Bytes in flight at transmit */
-			      is_app_limited:1, /* cwnd not fully used? */
-			      unused:1;
+			__u32 is_app_limited:1, /* cwnd not fully used? */
+			      delivered_ce:20,
+			      unused:11;
 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
 			__u32 delivered;
 			/* start of send pipeline phase */
@@ -1029,7 +1030,9 @@ struct ack_sample {
 struct rate_sample {
 	u64  prior_mstamp; /* starting timestamp for interval */
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
+	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
 	s32  delivered;		/* number of packets delivered over interval */
+	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
 	u32 snd_interval_us;	/* snd interval for delivered packets */
 	u32 rcv_interval_us;	/* rcv interval for delivered packets */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 141e85e6422b..53675e284841 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3221,7 +3221,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
 	long seq_rtt_us = -1L;
 	long ca_rtt_us = -1L;
 	u32 pkts_acked = 0;
-	u32 last_in_flight = 0;
 	bool rtt_update;
 	int flag = 0;
 
@@ -3257,7 +3256,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
 			if (!first_ackt)
 				first_ackt = last_ackt;
 
-			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
 			if (before(start_seq, reord))
 				reord = start_seq;
 			if (!after(scb->end_seq, tp->high_seq))
@@ -3323,8 +3321,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
 		seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
 		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
 
-		if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
-		    last_in_flight && !prior_sacked && fully_acked &&
+		if (pkts_acked == 1 && fully_acked && !prior_sacked &&
+		    (tp->snd_una - prior_snd_una) < tp->mss_cache &&
 		    sack->rate->prior_delivered + 1 == tp->delivered &&
 		    !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
 			/* Conservatively mark a delayed ACK. It's typically
@@ -3381,9 +3379,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
 
 	if (icsk->icsk_ca_ops->pkts_acked) {
 		struct ack_sample sample = { .pkts_acked = pkts_acked,
-					     .rtt_us = sack->rate->rtt_us,
-					     .in_flight = last_in_flight };
+					     .rtt_us = sack->rate->rtt_us };
 
+		sample.in_flight = tp->mss_cache *
+			(tp->delivered - sack->rate->prior_delivered);
 		icsk->icsk_ca_ops->pkts_acked(sk, &sample);
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6d72f3ea48c4..fdc39b4fbbfa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
 	skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
 	if (clone_it) {
-		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
-			- tp->snd_una;
 		oskb = skb;
 
 		tcp_skb_tsorted_save(oskb) {
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 0de693565963..fbab921670cc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
 	TCP_SKB_CB(skb)->tx.first_tx_mstamp	= tp->first_tx_mstamp;
 	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
+	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
 }
 
@@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 
 	if (!rs->prior_delivered ||
 	    after(scb->tx.delivered, rs->prior_delivered)) {
+		rs->prior_delivered_ce  = scb->tx.delivered_ce;
 		rs->prior_delivered  = scb->tx.delivered;
 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
 		rs->is_app_limited   = scb->tx.is_app_limited;
@@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	}
 	rs->delivered   = tp->delivered - rs->prior_delivered;
 
+	rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+	/* delivered_ce occupies less than 32 bits in the skb control block */
+	rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
+
 	/* Model sending data and receiving ACKs as separate pipeline phases
 	 * for a window. Usually the ACK phase is longer, but with ACK
 	 * compression the send phase can be longer. To be safe we use the
-- 
2.33.0.685.g46640cef36-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
  2021-09-23 21:17 [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample Luke Hsiao
@ 2021-09-24 13:30 ` patchwork-bot+netdevbpf
  2021-09-24 20:20 ` Jakub Kicinski
  1 sibling, 0 replies; 6+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-09-24 13:30 UTC (permalink / raw)
  To: Luke Hsiao; +Cc: davem, netdev, ycheng, brakmo, ncardwell, edumazet, lukehsiao

Hello:

This patch was applied to netdev/net-next.git (refs/heads/master):

On Thu, 23 Sep 2021 21:17:07 +0000 you wrote:
> From: Yuchung Cheng <ycheng@google.com>
> 
> In order to track CE marks per rate sample (one round trip), TCP needs a
> per-skb header field to record the tp->delivered_ce count when the skb
> was sent. To make space, we replace the "last_in_flight" field which is
> used exclusively for NV congestion control. The stat needed by NV can be
> alternatively approximated by existing stats tcp_sock delivered and
> mss_cache.
> 
> [...]

Here is the summary with links:
  - [net-next] tcp: tracking packets with CE marks in BW rate sample
    https://git.kernel.org/netdev/net-next/c/40bc6063796e

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
  2021-09-23 21:17 [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample Luke Hsiao
  2021-09-24 13:30 ` patchwork-bot+netdevbpf
@ 2021-09-24 20:20 ` Jakub Kicinski
  2021-09-24 23:30   ` Yuchung Cheng
  1 sibling, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2021-09-24 20:20 UTC (permalink / raw)
  To: Luke Hsiao
  Cc: David Miller, netdev, Yuchung Cheng, Lawrence Brakmo,
	Neal Cardwell, Eric Dumazet, Luke Hsiao

On Thu, 23 Sep 2021 21:17:07 +0000 Luke Hsiao wrote:
> From: Yuchung Cheng <ycheng@google.com>
> 
> In order to track CE marks per rate sample (one round trip), TCP needs a
> per-skb header field to record the tp->delivered_ce count when the skb
> was sent. To make space, we replace the "last_in_flight" field which is
> used exclusively for NV congestion control. The stat needed by NV can be
> alternatively approximated by existing stats tcp_sock delivered and
> mss_cache.
> 
> This patch counts the number of packets delivered which have CE marks in
> the rate sample, using similar approach of delivery accounting.

Is this expected to be used from BPF CC? I don't see a user..

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
  2021-09-24 20:20 ` Jakub Kicinski
@ 2021-09-24 23:30   ` Yuchung Cheng
  2021-09-24 23:42     ` Jakub Kicinski
  2021-09-26 22:31     ` Dave Taht
  0 siblings, 2 replies; 6+ messages in thread
From: Yuchung Cheng @ 2021-09-24 23:30 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Luke Hsiao, David Miller, netdev, Lawrence Brakmo, Neal Cardwell,
	Eric Dumazet, Luke Hsiao

On Fri, Sep 24, 2021 at 1:20 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 23 Sep 2021 21:17:07 +0000 Luke Hsiao wrote:
> > From: Yuchung Cheng <ycheng@google.com>
> >
> > In order to track CE marks per rate sample (one round trip), TCP needs a
> > per-skb header field to record the tp->delivered_ce count when the skb
> > was sent. To make space, we replace the "last_in_flight" field which is
> > used exclusively for NV congestion control. The stat needed by NV can be
> > alternatively approximated by existing stats tcp_sock delivered and
> > mss_cache.
> >
> > This patch counts the number of packets delivered which have CE marks in
> > the rate sample, using similar approach of delivery accounting.
>
> Is this expected to be used from BPF CC? I don't see a user..
Great question. Yes the commit message could be more clear that this
intends for both ebpf-CC or other third party module that use ECN. For
example bbr2 uses it heavily (bbr2 upstream WIP). This feature is
useful for congestion control research which many use ECN as core
signals now.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
  2021-09-24 23:30   ` Yuchung Cheng
@ 2021-09-24 23:42     ` Jakub Kicinski
  2021-09-26 22:31     ` Dave Taht
  1 sibling, 0 replies; 6+ messages in thread
From: Jakub Kicinski @ 2021-09-24 23:42 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: Luke Hsiao, David Miller, netdev, Lawrence Brakmo, Neal Cardwell,
	Eric Dumazet, Luke Hsiao

On Fri, 24 Sep 2021 16:30:28 -0700 Yuchung Cheng wrote:
> On Fri, Sep 24, 2021 at 1:20 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > On Thu, 23 Sep 2021 21:17:07 +0000 Luke Hsiao wrote:  
> > > From: Yuchung Cheng <ycheng@google.com>
> > >
> > > In order to track CE marks per rate sample (one round trip), TCP needs a
> > > per-skb header field to record the tp->delivered_ce count when the skb
> > > was sent. To make space, we replace the "last_in_flight" field which is
> > > used exclusively for NV congestion control. The stat needed by NV can be
> > > alternatively approximated by existing stats tcp_sock delivered and
> > > mss_cache.
> > >
> > > This patch counts the number of packets delivered which have CE marks in
> > > the rate sample, using similar approach of delivery accounting.  
> >
> > Is this expected to be used from BPF CC? I don't see a user..  
> Great question. Yes the commit message could be more clear that this
> intends for both ebpf-CC or other third party module that use ECN. For
> example bbr2 uses it heavily (bbr2 upstream WIP). This feature is
> useful for congestion control research which many use ECN as core
> signals now.

Interesting, thanks for explaining! :)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
  2021-09-24 23:30   ` Yuchung Cheng
  2021-09-24 23:42     ` Jakub Kicinski
@ 2021-09-26 22:31     ` Dave Taht
  1 sibling, 0 replies; 6+ messages in thread
From: Dave Taht @ 2021-09-26 22:31 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: Jakub Kicinski, Luke Hsiao, David Miller,
	Linux Kernel Network Developers, Lawrence Brakmo, Neal Cardwell,
	Eric Dumazet, Luke Hsiao

On Fri, Sep 24, 2021 at 4:43 PM Yuchung Cheng <ycheng@google.com> wrote:
>
> On Fri, Sep 24, 2021 at 1:20 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >
> > On Thu, 23 Sep 2021 21:17:07 +0000 Luke Hsiao wrote:
> > > From: Yuchung Cheng <ycheng@google.com>
> > >
> > > In order to track CE marks per rate sample (one round trip), TCP needs a
> > > per-skb header field to record the tp->delivered_ce count when the skb
> > > was sent. To make space, we replace the "last_in_flight" field which is
> > > used exclusively for NV congestion control. The stat needed by NV can be
> > > alternatively approximated by existing stats tcp_sock delivered and
> > > mss_cache.
> > >
> > > This patch counts the number of packets delivered which have CE marks in
> > > the rate sample, using similar approach of delivery accounting.
> >
> > Is this expected to be used from BPF CC? I don't see a user..
> Great question. Yes the commit message could be more clear that this
> intends for both ebpf-CC or other third party module that use ECN. For
> example bbr2 uses it heavily (bbr2 upstream WIP). This feature is
> useful for congestion control research which many use ECN as core
> signals now.

I am glad a common API to this is emerging. RFC3168 compliant aqms
(fq_codel, pie, cake) can also emit multiple CE marks per RTT, and
the defined response to them is inadequate. Glad you found space!

There is also this ecn problem outstanding elsewhere, that I hope
bbrv2 has looked into?

https://www.bobbriscoe.net/projects/latency/sub-mss-w.pdf

Otherwise:

Reviewed-by: Dave Taht <dave.taht@gmail.com>


--
Fixing Starlink's Latencies: https://www.youtube.com/watch?v=c9gLo6Xrwgw

Dave Täht CEO, TekLibre, LLC

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-09-26 22:33 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-23 21:17 [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample Luke Hsiao
2021-09-24 13:30 ` patchwork-bot+netdevbpf
2021-09-24 20:20 ` Jakub Kicinski
2021-09-24 23:30   ` Yuchung Cheng
2021-09-24 23:42     ` Jakub Kicinski
2021-09-26 22:31     ` Dave Taht

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.