All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] DCTCP drop compatibility
@ 2017-02-14 17:05 Koen De Schepper
  2017-02-15 17:17 ` David Miller
  0 siblings, 1 reply; 2+ messages in thread
From: Koen De Schepper @ 2017-02-14 17:05 UTC (permalink / raw)
  To: netdev
  Cc: Daniel Borkmann, Florian Westphal, Glenn Judd, Yuchung Cheng,
	Neal Cardwell, Andrew Shewmaker, Lawrence Brakmo,
	Koen De Schepper

 Makes DCTCP respond to drop. Additional Reno-like
 response is added when a packet gets lost. The
 clamp_alpha_on_loss implementation was not working.
 The parameter was renamed into dctcp_drop_compatible
 to express the expected result, not the implementation
 mechanism.

 If the dctcp_drop_compatible parameter is enabled, and in
 this RTT at least one loss occurred, the window is reduced
 additionally by half. The motivation is that packet
 loss is an indicator of extreme congestion. In practice,
 this turned out to be both beneficial for overloaded
 datacenter ECN-configured queues, and classic configured
 drop queues (non-ECN) where it supports coexistence with
 classic TCPs such as Cubic and Reno.

 On a PI2 or PIE AQM bottleneck without ECN support in the
 AQM, following drop probabilities are measured for 4
 competing flows of the same congestion control:

   TCP  |    Drop    |  Link   |   RTT    |    Drop
   CC   | compatible |  speed  | base+AQM | probability
 =======|============|=========|==========|============
  CUBIC |      -     |  40Mbps |  7+20ms  |    0.21%
  RENO  |      -     |         |          |    0.19%
  DCTCP |      1     |         |          |    0.22%
  DCTCP |      0     |         |          |   25.8%
 -------|------------|---------|----------|------------
  CUBIC |      -     | 100Mbps |  7+20ms  |    0.03%
  RENO  |      -     |         |          |    0.02%
  DCTCP |      1     |         |          |    0.04%
  DCTCP |      0     |         |          |   23.3%
 -------|------------|---------|----------|------------
  CUBIC |      -     | 800Mbps |   1+1ms  |    0.04%
  RENO  |      -     |         |          |    0.05%
  DCTCP |      1     |         |          |    0.06%
  DCTCP |      0     |         |          |   18.7%

 These results show that DCTCP drives drop based queues
 to high levels of loss when dctcp_drop_compatible=0,
 and becomes compatible with Cubic and Reno when
 dctcp_drop_compatible=1.

 The parameter is default on, to protect network paths
 that do not support ECN. DCTCP in MS-Windows is similarly
 drop compatible by default. 

---
 net/ipv4/tcp_dctcp.c | 79 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 5f5e593..d1cf111 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -67,10 +67,10 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
 module_param(dctcp_alpha_on_init, uint, 0644);
 MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
 
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
-		 "parameter for clamping alpha on loss");
+static unsigned int dctcp_drop_compatible __read_mostly = 1;
+module_param(dctcp_drop_compatible, uint, 0644);
+MODULE_PARM_DESC(dctcp_drop_compatible,
+		 "parameter for classic drop compatibility on loss");
 
 static struct tcp_congestion_ops dctcp_reno;
 
@@ -231,21 +231,62 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 	}
 }
 
-static void dctcp_state(struct sock *sk, u8 new_state)
+static void dctcp_adjust_on_loss(struct sock *sk)
 {
-	if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
-		struct dctcp *ca = inet_csk_ca(sk);
+	/* If the dctcp_drop_compatible extension is enabled, and
+	 * this RTT at least one loss occurred, the window is reduced
+	 * additionally by half. The motivation is that packet
+	 * loss is an indicator of extreme congestion. In practice,
+	 * this turned out to be both beneficial for overloaded
+	 * datacenter ECN-configured queues, and classic configured
+	 * drop queues (non-ECN) where it supports coexistence with
+	 * classic TCPs such as Cubic and Reno.
+	 *
+	 * On a PI2 or PIE AQM bottleneck without ECN support in the
+	 * AQM, following drop probabilities are measured for 4
+	 * competing flows of the same congestion control:
+	 *
+	 *   TCP  |    Drop    |  Link   |   RTT    |    Drop
+	 *   CC   | compatible |  speed  | base+AQM | probability
+	 * =======|============|=========|==========|============
+	 *  CUBIC |      -     |  40Mbps |  7+20ms  |    0.21%
+	 *  RENO  |      -     |         |          |    0.19%
+	 *  DCTCP |      1     |         |          |    0.22%
+	 *  DCTCP |      0     |         |          |   25.8%
+	 * -------|------------|---------|----------|------------
+	 *  CUBIC |      -     | 100Mbps |  7+20ms  |    0.03%
+	 *  RENO  |      -     |         |          |    0.02%
+	 *  DCTCP |      1     |         |          |    0.04%
+	 *  DCTCP |      0     |         |          |   23.3%
+	 * -------|------------|---------|----------|------------
+	 *  CUBIC |      -     | 800Mbps |   1+1ms  |    0.04%
+	 *  RENO  |      -     |         |          |    0.05%
+	 *  DCTCP |      1     |         |          |    0.06%
+	 *  DCTCP |      0     |         |          |   18.7%
+	 *
+	 * These results show that DCTCP drives drop based queues
+	 * to high levels of loss when dctcp_drop_compatible=0,
+	 * and becomes compatible with Cubic and Reno when
+	 * dctcp_drop_compatible=1.
+	 */
 
-		/* If this extension is enabled, we clamp dctcp_alpha to
-		 * max on packet loss; the motivation is that dctcp_alpha
-		 * is an indicator to the extend of congestion and packet
-		 * loss is an indicator of extreme congestion; setting
-		 * this in practice turned out to be beneficial, and
-		 * effectively assumes total congestion which reduces the
-		 * window by half.
-		 */
-		ca->dctcp_alpha = DCTCP_MAX_ALPHA;
-	}
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ca->loss_cwnd = tp->snd_cwnd;
+	tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+	/* TCP_CA_LOSS is better handled via the event call back for 2 reasons:
+	 * - event is called immediately after call to ssthresh
+	 * - state is called also for MTU probe loss, which uses simple_ssthresh
+	 */
+	if (dctcp_drop_compatible &&
+	    (new_state == TCP_CA_Recovery) &&
+	    !(tcp_in_cwnd_reduction(sk)))
+		dctcp_adjust_on_loss(sk);
 }
 
 static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
@@ -280,6 +321,10 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	case CA_EVENT_NON_DELAYED_ACK:
 		dctcp_update_ack_reserved(sk, ev);
 		break;
+	case CA_EVENT_LOSS:
+		if (dctcp_drop_compatible)
+			dctcp_adjust_on_loss(sk);
+		break;
 	default:
 		/* Don't care for the rest. */
 		break;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH net-next] DCTCP drop compatibility
  2017-02-14 17:05 [PATCH net-next] DCTCP drop compatibility Koen De Schepper
@ 2017-02-15 17:17 ` David Miller
  0 siblings, 0 replies; 2+ messages in thread
From: David Miller @ 2017-02-15 17:17 UTC (permalink / raw)
  To: koen0607
  Cc: netdev, daniel, fw, glenn.judd, ycheng, ncardwell, agshew,
	brakmo, koen.de_schepper


First, this needs to be reviewed by other DCTCP experts.

Second, it is missing a proper Signed-off-by: tag.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-02-15 17:17 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-14 17:05 [PATCH net-next] DCTCP drop compatibility Koen De Schepper
2017-02-15 17:17 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.