All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] tcp_bbr: more GSO work
@ 2018-02-28 22:40 Eric Dumazet
  2018-02-28 22:40 ` [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II) Eric Dumazet
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Eric Dumazet @ 2018-02-28 22:40 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Neal Cardwell, Yuchung Cheng, Soheil Hassas Yeganeh,
	Eric Dumazet, Eric Dumazet

Playing with r8152 USB 1Gbit NIC, on both USB2 and USB3 slots,
I found that BBR was performing poorly, because of TSO being limited to 16KB

This patch series makes sure BBR is not under estimating number
of packets that are needed to fill the pipe when a device has
suboptimal TSO limits.

Eric Dumazet (2):
  tcp_bbr: better deal with suboptimal GSO (II)
  tcp_bbr: remove bbr->tso_segs_goal

 include/net/tcp.h     |  6 ++----
 net/ipv4/tcp_bbr.c    | 33 ++++++++++++++++-----------------
 net/ipv4/tcp_output.c | 15 ++++++++-------
 3 files changed, 26 insertions(+), 28 deletions(-)

-- 
2.16.2.395.g2e18187dfd-goog

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II)
  2018-02-28 22:40 [PATCH net-next 0/2] tcp_bbr: more GSO work Eric Dumazet
@ 2018-02-28 22:40 ` Eric Dumazet
  2018-03-01  3:15   ` Neal Cardwell
  2018-02-28 22:40 ` [PATCH net-next 2/2] tcp_bbr: remove bbr->tso_segs_goal Eric Dumazet
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2018-02-28 22:40 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Neal Cardwell, Yuchung Cheng, Soheil Hassas Yeganeh,
	Eric Dumazet, Eric Dumazet

This is second part of dealing with suboptimal device gso parameters.
In first patch (350c9f484bde "tcp_bbr: better deal with suboptimal GSO")
we dealt with devices having low gso_max_segs

Some devices lower gso_max_size from 64KB to 16 KB (r8152 is an example)

In order to probe an optimal cwnd, we want BBR being not sensitive
to whatever GSO constraint a device can have.

This patch removes tso_segs_goal() CC callback in favor of
min_tso_segs() for CC wanting to override sysctl_tcp_min_tso_segs

Next patch will remove bbr->tso_segs_goal since it does not have
to be persistent.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/tcp.h     |  6 ++----
 net/ipv4/tcp_bbr.c    | 23 +++++++++++++----------
 net/ipv4/tcp_output.c | 15 ++++++++-------
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92b06c6e7732ad7c61b580427fc085fa0dff1063..9c9b3768b350abfd51776563d220d5e97ca9da69 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@ struct tcp_congestion_ops {
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-	/* suggest number of segments for each skb to transmit (optional) */
-	u32 (*tso_segs_goal)(struct sock *sk);
+	/* override sysctl_tcp_min_tso_segs */
+	u32 (*min_tso_segs)(struct sock *sk);
 	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
 	u32 (*sndbuf_expand)(struct sock *sk);
 	/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c82cddd11633fd4bfdbc6d84f4bcc..afc0567b8a98fbb718ba04505053aa3f62ab5784 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -261,23 +261,26 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 		sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	return bbr->tso_segs_goal;
+	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
 static void bbr_set_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 min_segs;
+	u32 segs, bytes;
+
+	/* Sort of tcp_tso_autosize() but ignoring
+	 * driver provided sk_gso_max_size.
+	 */
+	bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-				 0x7FU);
+	bbr->tso_segs_goal = min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -936,7 +939,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.undo_cwnd	= bbr_undo_cwnd,
 	.cwnd_event	= bbr_cwnd_event,
 	.ssthresh	= bbr_ssthresh,
-	.tso_segs_goal	= bbr_tso_segs_goal,
+	.min_tso_segs	= bbr_min_tso_segs,
 	.get_info	= bbr_get_info,
 	.set_state	= bbr_set_state,
 };
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 49d043de3476bdfcaf6e9a606d0da0f2094373a8..383cac0ff0ec059ca7dbc1a6304cc7f8183e008d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+			    int min_tso_segs)
 {
 	u32 bytes, segs;
 
@@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
 	return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+	u32 min_tso, tso_segs;
 
-	if (!tso_segs)
-		tso_segs = tcp_tso_autosize(sk, mss_now,
-				sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	min_tso = ca_ops->min_tso_segs ?
+			ca_ops->min_tso_segs(sk) :
+			sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
-- 
2.16.2.395.g2e18187dfd-goog

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/2] tcp_bbr: remove bbr->tso_segs_goal
  2018-02-28 22:40 [PATCH net-next 0/2] tcp_bbr: more GSO work Eric Dumazet
  2018-02-28 22:40 ` [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II) Eric Dumazet
@ 2018-02-28 22:40 ` Eric Dumazet
  2018-03-01  3:17   ` Neal Cardwell
  2018-02-28 22:42 ` [PATCH net-next 0/2] tcp_bbr: more GSO work Soheil Hassas Yeganeh
  2018-03-02  2:44 ` David Miller
  3 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2018-02-28 22:40 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Neal Cardwell, Yuchung Cheng, Soheil Hassas Yeganeh,
	Eric Dumazet, Eric Dumazet

Its value is computed then immediately used,
there is no need to store it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp_bbr.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index afc0567b8a98fbb718ba04505053aa3f62ab5784..c92014cb1e165df05651eb05f2133c2e9473d54f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
 		packet_conservation:1,  /* use packet conservation? */
 		restore_cwnd:1,	     /* decided to revert cwnd to old value */
 		round_start:1,	     /* start of packet-timed tx->ack round? */
-		tso_segs_goal:7,     /* segments we want in each skb we send */
 		idle_restart:1,	     /* restarting after idle? */
 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-		unused:5,
+		unused:12,
 		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
 		lt_rtt_cnt:7,	     /* round trips in long-term interval */
 		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
@@ -267,10 +266,9 @@ static u32 bbr_min_tso_segs(struct sock *sk)
 	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
 	u32 segs, bytes;
 
 	/* Sort of tcp_tso_autosize() but ignoring
@@ -280,7 +278,7 @@ static void bbr_set_tso_segs_goal(struct sock *sk)
 		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
 	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	bbr->tso_segs_goal = min(segs, 0x7FU);
+	return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -351,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
 	/* Allow enough full-sized skbs in flight to utilize end systems. */
-	cwnd += 3 * bbr->tso_segs_goal;
+	cwnd += 3 * bbr_tso_segs_goal(sk);
 
 	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
 	cwnd = (cwnd + 1) & ~1U;
@@ -827,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
 	bw = bbr_bw(sk);
 	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-	bbr_set_tso_segs_goal(sk);
 	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -837,7 +834,6 @@ static void bbr_init(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->prior_cwnd = 0;
-	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
 	bbr->rtt_cnt = 0;
 	bbr->next_rtt_delivered = 0;
 	bbr->prev_ca_state = TCP_CA_Open;
-- 
2.16.2.395.g2e18187dfd-goog

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 0/2] tcp_bbr: more GSO work
  2018-02-28 22:40 [PATCH net-next 0/2] tcp_bbr: more GSO work Eric Dumazet
  2018-02-28 22:40 ` [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II) Eric Dumazet
  2018-02-28 22:40 ` [PATCH net-next 2/2] tcp_bbr: remove bbr->tso_segs_goal Eric Dumazet
@ 2018-02-28 22:42 ` Soheil Hassas Yeganeh
  2018-03-02  2:44 ` David Miller
  3 siblings, 0 replies; 7+ messages in thread
From: Soheil Hassas Yeganeh @ 2018-02-28 22:42 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, netdev, Neal Cardwell, Yuchung Cheng, Eric Dumazet

On Wed, Feb 28, 2018 at 5:40 PM Eric Dumazet <edumazet@google.com> wrote:

> Playing with r8152 USB 1Gbit NIC, on both USB2 and USB3 slots,
> I found that BBR was performing poorly, because of TSO being limited to
16KB

> This patch series makes sure BBR is not under estimating number
> of packets that are needed to fill the pipe when a device has
> suboptimal TSO limits.

> Eric Dumazet (2):
>    tcp_bbr: better deal with suboptimal GSO (II)
>    tcp_bbr: remove bbr->tso_segs_goal

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

Thank you, Eric!

>   include/net/tcp.h     |  6 ++----
>   net/ipv4/tcp_bbr.c    | 33 ++++++++++++++++-----------------
>   net/ipv4/tcp_output.c | 15 ++++++++-------
>   3 files changed, 26 insertions(+), 28 deletions(-)

> --
> 2.16.2.395.g2e18187dfd-goog

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II)
  2018-02-28 22:40 ` [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II) Eric Dumazet
@ 2018-03-01  3:15   ` Neal Cardwell
  0 siblings, 0 replies; 7+ messages in thread
From: Neal Cardwell @ 2018-03-01  3:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, netdev, Yuchung Cheng, Soheil Hassas Yeganeh,
	Eric Dumazet

On Wed, Feb 28, 2018 at 5:40 PM, Eric Dumazet <edumazet@google.com> wrote:
>
> This is second part of dealing with suboptimal device gso parameters.
> In first patch (350c9f484bde "tcp_bbr: better deal with suboptimal GSO")
> we dealt with devices having low gso_max_segs
>
> Some devices lower gso_max_size from 64KB to 16 KB (r8152 is an example)
>
> In order to probe an optimal cwnd, we want BBR being not sensitive
> to whatever GSO constraint a device can have.
>
> This patch removes tso_segs_goal() CC callback in favor of
> min_tso_segs() for CC wanting to override sysctl_tcp_min_tso_segs
>
> Next patch will remove bbr->tso_segs_goal since it does not have
> to be persistent.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---

Acked-by: Neal Cardwell <ncardwell@google.com>

Looks great to me. Thanks, Eric!

neal

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] tcp_bbr: remove bbr->tso_segs_goal
  2018-02-28 22:40 ` [PATCH net-next 2/2] tcp_bbr: remove bbr->tso_segs_goal Eric Dumazet
@ 2018-03-01  3:17   ` Neal Cardwell
  0 siblings, 0 replies; 7+ messages in thread
From: Neal Cardwell @ 2018-03-01  3:17 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, netdev, Yuchung Cheng, Soheil Hassas Yeganeh,
	Eric Dumazet

On Wed, Feb 28, 2018 at 5:40 PM, Eric Dumazet <edumazet@google.com> wrote:
> Its value is computed then immediately used,
> there is no need to store it.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---

Acked-by: Neal Cardwell <ncardwell@google.com>

This one also looks great to me. Thanks, Eric!

neal

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 0/2] tcp_bbr: more GSO work
  2018-02-28 22:40 [PATCH net-next 0/2] tcp_bbr: more GSO work Eric Dumazet
                   ` (2 preceding siblings ...)
  2018-02-28 22:42 ` [PATCH net-next 0/2] tcp_bbr: more GSO work Soheil Hassas Yeganeh
@ 2018-03-02  2:44 ` David Miller
  3 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2018-03-02  2:44 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, ncardwell, ycheng, soheil, eric.dumazet

From: Eric Dumazet <edumazet@google.com>
Date: Wed, 28 Feb 2018 14:40:45 -0800

> Playing with r8152 USB 1Gbit NIC, on both USB2 and USB3 slots,
> I found that BBR was performing poorly, because of TSO being limited to 16KB
> 
> This patch series makes sure BBR is not under estimating number
> of packets that are needed to fill the pipe when a device has
> suboptimal TSO limits.

Series applied, thanks Eric.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-03-02  2:44 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-28 22:40 [PATCH net-next 0/2] tcp_bbr: more GSO work Eric Dumazet
2018-02-28 22:40 ` [PATCH net-next 1/2] tcp_bbr: better deal with suboptimal GSO (II) Eric Dumazet
2018-03-01  3:15   ` Neal Cardwell
2018-02-28 22:40 ` [PATCH net-next 2/2] tcp_bbr: remove bbr->tso_segs_goal Eric Dumazet
2018-03-01  3:17   ` Neal Cardwell
2018-02-28 22:42 ` [PATCH net-next 0/2] tcp_bbr: more GSO work Soheil Hassas Yeganeh
2018-03-02  2:44 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.