All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 0/3] TCP trivial patches
@ 2007-02-12 16:03 Stephen Hemminger
  2007-02-12 16:03 ` [patch 1/3] tcp: cleanup of htcp Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 16:03 UTC (permalink / raw)
  To: Dave Miller; +Cc: netdev

Minor changes for 2.6.21

--


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [patch 1/3] tcp: cleanup of htcp
  2007-02-12 16:03 [patch 0/3] TCP trivial patches Stephen Hemminger
@ 2007-02-12 16:03 ` Stephen Hemminger
  2007-02-12 21:14   ` David Miller
  2007-02-12 16:03 ` [patch 2/3] tcp: use read mostly for CUBIC parameters Stephen Hemminger
  2007-02-12 16:03 ` [patch 3/3] tcp: remove experimental variants from default list Stephen Hemminger
  2 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 16:03 UTC (permalink / raw)
  To: Dave Miller; +Cc: netdev

[-- Attachment #1: htcp-cleanup.patch --]
[-- Type: text/plain, Size: 6504 bytes --]

Minor non-invasive cleanups:
 * white space around operators and line wrapping
 * use const
 * use __read_mostly

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/ipv4/tcp_htcp.c |   69 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 30 deletions(-)

--- tcp.orig/net/ipv4/tcp_htcp.c	2007-02-09 20:23:35.000000000 -0800
+++ tcp/net/ipv4/tcp_htcp.c	2007-02-12 07:25:56.000000000 -0800
@@ -10,22 +10,23 @@
 #include <linux/module.h>
 #include <net/tcp.h>
 
-#define ALPHA_BASE	(1<<7)  /* 1.0 with shift << 7 */
-#define BETA_MIN	(1<<6)  /* 0.5 with shift << 7 */
+#define ALPHA_BASE	(1<<7)	/* 1.0 with shift << 7 */
+#define BETA_MIN	(1<<6)	/* 0.5 with shift << 7 */
 #define BETA_MAX	102	/* 0.8 with shift << 7 */
 
-static int use_rtt_scaling = 1;
+static int use_rtt_scaling __read_mostly = 1;
 module_param(use_rtt_scaling, int, 0644);
 MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling");
 
-static int use_bandwidth_switch = 1;
+static int use_bandwidth_switch __read_mostly = 1;
 module_param(use_bandwidth_switch, int, 0644);
 MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
 
 struct htcp {
 	u32	alpha;		/* Fixed point arith, << 7 */
 	u8	beta;           /* Fixed point arith, << 7 */
-	u8	modeswitch;     /* Delay modeswitch until we had at least one congestion event */
+	u8	modeswitch;	/* Delay modeswitch
+				   until we had at least one congestion event */
 	u16	pkts_acked;
 	u32	packetcount;
 	u32	minRTT;
@@ -44,14 +45,14 @@
 	u32	lasttime;
 };
 
-static inline u32 htcp_cong_time(struct htcp *ca)
+static inline u32 htcp_cong_time(const struct htcp *ca)
 {
 	return jiffies - ca->last_cong;
 }
 
-static inline u32 htcp_ccount(struct htcp *ca)
+static inline u32 htcp_ccount(const struct htcp *ca)
 {
-	return htcp_cong_time(ca)/ca->minRTT;
+	return htcp_cong_time(ca) / ca->minRTT;
 }
 
 static inline void htcp_reset(struct htcp *ca)
@@ -67,10 +68,12 @@
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
+
 	ca->last_cong = ca->undo_last_cong;
 	ca->maxRTT = ca->undo_maxRTT;
 	ca->old_maxB = ca->undo_old_maxB;
-	return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
+
+	return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
 }
 
 static inline void measure_rtt(struct sock *sk)
@@ -78,17 +81,19 @@
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
-	u32 srtt = tp->srtt>>3;
+	u32 srtt = tp->srtt >> 3;
 
 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
 	if (ca->minRTT > srtt || !ca->minRTT)
 		ca->minRTT = srtt;
 
 	/* max RTT */
-	if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
+	if (icsk->icsk_ca_state == TCP_CA_Open
+	    && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
 		if (ca->maxRTT < ca->minRTT)
 			ca->maxRTT = ca->minRTT;
-		if (ca->maxRTT < srtt && srtt <= ca->maxRTT+msecs_to_jiffies(20))
+		if (ca->maxRTT < srtt
+		    && srtt <= ca->maxRTT + msecs_to_jiffies(20))
 			ca->maxRTT = srtt;
 	}
 }
@@ -116,15 +121,16 @@
 
 	ca->packetcount += pkts_acked;
 
-	if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1)
-			&& now - ca->lasttime >= ca->minRTT
-			&& ca->minRTT > 0) {
-		__u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime);
+	if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1)
+	    && now - ca->lasttime >= ca->minRTT
+	    && ca->minRTT > 0) {
+		__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
+
 		if (htcp_ccount(ca) <= 3) {
 			/* just after backoff */
 			ca->minB = ca->maxB = ca->Bi = cur_Bi;
 		} else {
-			ca->Bi = (3*ca->Bi + cur_Bi)/4;
+			ca->Bi = (3 * ca->Bi + cur_Bi) / 4;
 			if (ca->Bi > ca->maxB)
 				ca->maxB = ca->Bi;
 			if (ca->minB > ca->maxB)
@@ -142,7 +148,7 @@
 		u32 old_maxB = ca->old_maxB;
 		ca->old_maxB = ca->maxB;
 
-		if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) {
+		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
 			ca->beta = BETA_MIN;
 			ca->modeswitch = 0;
 			return;
@@ -150,7 +156,7 @@
 	}
 
 	if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) {
-		ca->beta = (minRTT<<7)/maxRTT;
+		ca->beta = (minRTT << 7) / maxRTT;
 		if (ca->beta < BETA_MIN)
 			ca->beta = BETA_MIN;
 		else if (ca->beta > BETA_MAX)
@@ -169,23 +175,26 @@
 
 	if (diff > HZ) {
 		diff -= HZ;
-		factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ;
+		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / HZ)) / HZ;
 	}
 
 	if (use_rtt_scaling && minRTT) {
-		u32 scale = (HZ<<3)/(10*minRTT);
-		scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */
-		factor = (factor<<3)/scale;
+		u32 scale = (HZ << 3) / (10 * minRTT);
+
+		/* clamping ratio to interval [0.5,10]<<3 */
+		scale = min(max(scale, 1U << 2), 10U << 3);
+		factor = (factor << 3) / scale;
 		if (!factor)
 			factor = 1;
 	}
 
-	ca->alpha = 2*factor*((1<<7)-ca->beta);
+	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
 	if (!ca->alpha)
 		ca->alpha = ALPHA_BASE;
 }
 
-/* After we have the rtt data to calculate beta, we'd still prefer to wait one
+/*
+ * After we have the rtt data to calculate beta, we'd still prefer to wait one
  * rtt before we adjust our beta to ensure we are working from a consistent
  * data.
  *
@@ -202,15 +211,16 @@
 	htcp_beta_update(ca, minRTT, maxRTT);
 	htcp_alpha_update(ca);
 
-	/* add slowly fading memory for maxRTT to accommodate routing changes etc */
+	/* add slowly fading memory for maxRTT to accommodate routing changes */
 	if (minRTT > 0 && maxRTT > minRTT)
-		ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
+		ca->maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
 }
 
 static u32 htcp_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct htcp *ca = inet_csk_ca(sk);
+
 	htcp_param_update(sk);
 	return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
@@ -224,16 +234,15 @@
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh)
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
 	else {
-
 		measure_rtt(sk);
 
 		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
 		 */
-		if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
+		if ((tp->snd_cwnd_cnt * ca->alpha) >> 7 >= tp->snd_cwnd) {
 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 				tp->snd_cwnd++;
 			tp->snd_cwnd_cnt = 0;

--


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [patch 2/3] tcp: use read mostly for CUBIC parameters
  2007-02-12 16:03 [patch 0/3] TCP trivial patches Stephen Hemminger
  2007-02-12 16:03 ` [patch 1/3] tcp: cleanup of htcp Stephen Hemminger
@ 2007-02-12 16:03 ` Stephen Hemminger
  2007-02-12 21:15   ` David Miller
  2007-02-12 16:03 ` [patch 3/3] tcp: remove experimental variants from default list Stephen Hemminger
  2 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 16:03 UTC (permalink / raw)
  To: Dave Miller; +Cc: netdev

[-- Attachment #1: bic-mostly.patch --]
[-- Type: text/plain, Size: 1342 bytes --]

These module parameters should be in the read mostly area
to avoid cache pollution.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/ipv4/tcp_cubic.c |   20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

--- tcp.orig/net/ipv4/tcp_cubic.c	2007-02-12 07:28:55.000000000 -0800
+++ tcp/net/ipv4/tcp_cubic.c	2007-02-12 07:32:33.000000000 -0800
@@ -26,16 +26,16 @@
 					  */
 #define	BICTCP_HZ		10	/* BIC HZ 2^10 = 1024 */
 
-static int fast_convergence = 1;
-static int max_increment = 16;
-static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
-static int initial_ssthresh = 100;
-static int bic_scale = 41;
-static int tcp_friendliness = 1;
-
-static u32 cube_rtt_scale;
-static u32 beta_scale;
-static u64 cube_factor;
+static int fast_convergence __read_mostly = 1;
+static int max_increment __read_mostly = 16;
+static int beta __read_mostly = 819;	/* = 819/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh __read_mostly = 100;
+static int bic_scale __read_mostly = 41;
+static int tcp_friendliness __read_mostly = 1;
+
+static u32 cube_rtt_scale __read_mostly;
+static u32 beta_scale __read_mostly;
+static u64 cube_factor __read_mostly;
 
 /* Note parameters that are used for precomputing scale factors are read-only */
 module_param(fast_convergence, int, 0644);

--


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 16:03 [patch 0/3] TCP trivial patches Stephen Hemminger
  2007-02-12 16:03 ` [patch 1/3] tcp: cleanup of htcp Stephen Hemminger
  2007-02-12 16:03 ` [patch 2/3] tcp: use read mostly for CUBIC parameters Stephen Hemminger
@ 2007-02-12 16:03 ` Stephen Hemminger
  2007-02-12 19:11   ` Baruch Even
  2 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 16:03 UTC (permalink / raw)
  To: Dave Miller; +Cc: netdev

[-- Attachment #1: no-vegas-default.patch --]
[-- Type: text/plain, Size: 1066 bytes --]

The TCP Vegas implementation is buggy, and BIC is too agressive
so they should not be in the default list. Westwood is okay, but
not well tested.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>

---
 net/ipv4/Kconfig |   10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

--- tcp.orig/net/ipv4/Kconfig	2007-02-12 07:58:28.000000000 -0800
+++ tcp/net/ipv4/Kconfig	2007-02-12 08:00:22.000000000 -0800
@@ -581,21 +581,12 @@
 	  Select the TCP congestion control that will be used by default
 	  for all connections.
 
-	config DEFAULT_BIC
-		bool "Bic" if TCP_CONG_BIC=y
-
 	config DEFAULT_CUBIC
 		bool "Cubic" if TCP_CONG_CUBIC=y
 
 	config DEFAULT_HTCP
 		bool "Htcp" if TCP_CONG_HTCP=y
 
-	config DEFAULT_VEGAS
-		bool "Vegas" if TCP_CONG_VEGAS=y
-
-	config DEFAULT_WESTWOOD
-		bool "Westwood" if TCP_CONG_WESTWOOD=y
-
 	config DEFAULT_RENO
 		bool "Reno"
 
@@ -603,6 +594,7 @@
 
 endif
 
+# This sets the default value for those who do not choose advanced menu
 config TCP_CONG_CUBIC
 	tristate
 	depends on !TCP_CONG_ADVANCED

--


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 16:03 ` [patch 3/3] tcp: remove experimental variants from default list Stephen Hemminger
@ 2007-02-12 19:11   ` Baruch Even
  2007-02-12 20:13     ` Ian McDonald
  2007-02-12 20:20     ` David Miller
  0 siblings, 2 replies; 29+ messages in thread
From: Baruch Even @ 2007-02-12 19:11 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Dave Miller, netdev

* Stephen Hemminger <shemminger@linux-foundation.org> [070212 18:04]:
> The TCP Vegas implementation is buggy, and BIC is too agressive
> so they should not be in the default list. Westwood is okay, but
> not well tested.

Since no one really agrees on the relative merits and problems of the
different algorithms and since the users themselves dont know, dont care
and have no clue on what should be the correct behaviour to report bugs
(see the old bic bugs, the htcp bugs, the recent sack bugs) I would
suggest to avoid making the whole internet a guinea pig and get back to
reno. If someone really needs to push high BDP flows he should test it
himself and choose what works for his kernel at the time.

For myself and anyone who asks me I recommend to set the default to
reno. For the few who really need high speed flows, they should test
kernel and protocol combination.

Baruch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 19:11   ` Baruch Even
@ 2007-02-12 20:13     ` Ian McDonald
  2007-02-12 20:26       ` Stephen Hemminger
  2007-02-12 20:32       ` David Miller
  2007-02-12 20:20     ` David Miller
  1 sibling, 2 replies; 29+ messages in thread
From: Ian McDonald @ 2007-02-12 20:13 UTC (permalink / raw)
  To: Baruch Even; +Cc: Stephen Hemminger, Dave Miller, netdev

On 2/13/07, Baruch Even <baruch@ev-en.org> wrote:
> * Stephen Hemminger <shemminger@linux-foundation.org> [070212 18:04]:
> > The TCP Vegas implementation is buggy, and BIC is too agressive
> > so they should not be in the default list. Westwood is okay, but
> > not well tested.
>
> Since no one really agrees on the relative merits and problems of the
> different algorithms and since the users themselves dont know, dont care
> and have no clue on what should be the correct behaviour to report bugs
> (see the old bic bugs, the htcp bugs, the recent sack bugs) I would
> suggest to avoid making the whole internet a guinea pig and get back to
> reno. If someone really needs to push high BDP flows he should test it
> himself and choose what works for his kernel at the time.
>
> For myself and anyone who asks me I recommend to set the default to
> reno. For the few who really need high speed flows, they should test
> kernel and protocol combination.
>
> Baruch

I agree wholeheartedly with Baruch. If we are going to remove BIC as
default we should go back to Reno as Cubic is even less tested in
production use than BIC.

Unless of course the papers you saw at PFLDNET showed that Cubic was a
really good choice and you want to point us to those papers.

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://iansblog.jandi.co.nz
WAND Network Research Group

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 19:11   ` Baruch Even
  2007-02-12 20:13     ` Ian McDonald
@ 2007-02-12 20:20     ` David Miller
  2007-02-12 22:12       ` Baruch Even
  1 sibling, 1 reply; 29+ messages in thread
From: David Miller @ 2007-02-12 20:20 UTC (permalink / raw)
  To: baruch; +Cc: shemminger, netdev

From: Baruch Even <baruch@ev-en.org>
Date: Mon, 12 Feb 2007 21:11:01 +0200

> Since no one really agrees on the relative merits and problems of the
> different algorithms and since the users themselves dont know, dont care
> and have no clue on what should be the correct behaviour to report bugs
> (see the old bic bugs, the htcp bugs, the recent sack bugs) I would
> suggest to avoid making the whole internet a guinea pig and get back to
> reno. If someone really needs to push high BDP flows he should test it
> himself and choose what works for his kernel at the time.
> 
> For myself and anyone who asks me I recommend to set the default to
> reno. For the few who really need high speed flows, they should test
> kernel and protocol combination.

We have "high BDP flows" even going from between the east and the west
coast of the United States.

This doesn't even begin to touch upon extremely well connected
coutries like South Korea and what happens when people there try to
access sites in Europe or the US.

Good high BDP flow handling is necessary now and for everyday usage of
the internet, it's not some obscure thing only researchers in fancy
labs need.

This also isn't the internet of 15 years ago where IETF members can
spend 4 or 5 years masterbating over new ideas before deploying them.
I know that's what conservative folks want, but it isn't going to
happen.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:13     ` Ian McDonald
@ 2007-02-12 20:26       ` Stephen Hemminger
  2007-02-12 20:34         ` David Miller
  2007-02-12 20:32       ` David Miller
  1 sibling, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 20:26 UTC (permalink / raw)
  To: Ian McDonald; +Cc: Baruch Even, Dave Miller, netdev

On Tue, 13 Feb 2007 09:13:52 +1300
"Ian McDonald" <ian.mcdonald@jandi.co.nz> wrote:

> On 2/13/07, Baruch Even <baruch@ev-en.org> wrote:
> > * Stephen Hemminger <shemminger@linux-foundation.org> [070212 18:04]:
> > > The TCP Vegas implementation is buggy, and BIC is too agressive
> > > so they should not be in the default list. Westwood is okay, but
> > > not well tested.
> >
> > Since no one really agrees on the relative merits and problems of the
> > different algorithms and since the users themselves dont know, dont care
> > and have no clue on what should be the correct behaviour to report bugs
> > (see the old bic bugs, the htcp bugs, the recent sack bugs) I would
> > suggest to avoid making the whole internet a guinea pig and get back to
> > reno. If someone really needs to push high BDP flows he should test it
> > himself and choose what works for his kernel at the time.
> >
> > For myself and anyone who asks me I recommend to set the default to
> > reno. For the few who really need high speed flows, they should test
> > kernel and protocol combination.
> >
> > Baruch
> 
> I agree wholeheartedly with Baruch. If we are going to remove BIC as
> default we should go back to Reno as Cubic is even less tested in
> production use than BIC.
> 
> Unless of course the papers you saw at PFLDNET showed that Cubic was a
> really good choice and you want to point us to those papers.

No magic paper. But the impression from multiple talks is that Cubic
is still doing fine.  Also for non high speed flows, it really doesn't matter
because all the loss based congestion controls behave the same.

-- 
Stephen Hemminger <shemminger@linux-foundation.org>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:13     ` Ian McDonald
  2007-02-12 20:26       ` Stephen Hemminger
@ 2007-02-12 20:32       ` David Miller
  2007-02-12 20:37         ` Stephen Hemminger
  2007-02-12 21:05         ` Ian McDonald
  1 sibling, 2 replies; 29+ messages in thread
From: David Miller @ 2007-02-12 20:32 UTC (permalink / raw)
  To: ian.mcdonald; +Cc: baruch, shemminger, netdev

From: "Ian McDonald" <ian.mcdonald@jandi.co.nz>
Date: Tue, 13 Feb 2007 09:13:52 +1300

> Unless of course the papers you saw at PFLDNET showed that Cubic was a
> really good choice and you want to point us to those papers.

I heavily dislike all of these "reactionary" patches from Stephen
after he attended PFDLNET.

If he never went there, none of these patches would have been
proposed.  He went to the sermon and he became converted :-)

We want people to play with this stuff, and they can experiment
regardless of whatever options or even code we put into the kernel.
Every user can muck with the congestion control on their computer
however they want, and THAT'S GOOD!

Sure we indirectly recommend to distribution vendors what to use
by default by the Kconfig defaults we put into the vanilla tree,
and that's fine too.

Even after reading all of the papers, I still think CUBIC or even BIC
by default is not all that controbersal or radical thing to use by
default.

I'm sorry if the researchers and IETF folks don't like this.  Too bad,
get over it.

If you use RENO you're stupid, since performance is going to stink for
absolutely normal connections.  Fact: high BDP pipes are everywhere,
even grandma has one.  So just taking out the best solution we have
for that problem currently because it's not perfect is not the answer.

This is not the internet of 15 years ago, please wake up everyone.
We cannot sit on eggs for 5 years to make sure they hatch perfectly
like was previously possible.

I think the best thing we ever did was create the congestion control
algorithm abstraction and add a bunch of reasonable algorithms, and
then on top of that try to use something modern by default.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:26       ` Stephen Hemminger
@ 2007-02-12 20:34         ` David Miller
  0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2007-02-12 20:34 UTC (permalink / raw)
  To: shemminger; +Cc: ian.mcdonald, baruch, netdev

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Feb 2007 12:26:34 -0800

> No magic paper. But the impression from multiple talks is that Cubic
> is still doing fine.  Also for non high speed flows, it really doesn't matter
> because all the loss based congestion controls behave the same.

This last point is important.

Either it's high BDP and it uses the new stuff (and we WANT it to)
or it's not high BDP and it USES RENO!

That's why Baruch's email is totally illogical.

If people don't have high BDP, these algorithms such as CUBIC and BIC
change nothing compared to Reno, they purposely behave precisely the
same.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:32       ` David Miller
@ 2007-02-12 20:37         ` Stephen Hemminger
  2007-02-12 20:47           ` David Miller
  2007-02-12 21:05         ` Ian McDonald
  1 sibling, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 20:37 UTC (permalink / raw)
  To: David Miller; +Cc: ian.mcdonald, baruch, netdev

On Mon, 12 Feb 2007 12:32:40 -0800 (PST)
David Miller <davem@davemloft.net> wrote:

> From: "Ian McDonald" <ian.mcdonald@jandi.co.nz>
> Date: Tue, 13 Feb 2007 09:13:52 +1300
> 
> > Unless of course the papers you saw at PFLDNET showed that Cubic was a
> > really good choice and you want to point us to those papers.
> 
> I heavily dislike all of these "reactionary" patches from Stephen
> after he attended PFDLNET.
> 
> If he never went there, none of these patches would have been
> proposed.  He went to the sermon and he became converted :-)

My patches weren't reactionary. Going to pure old Reno is reactionary.
It was more looking at the state of the code on the flight back
and cleaning house. Others were/are reactionary. 


> We want people to play with this stuff, and they can experiment
> regardless of whatever options or even code we put into the kernel.
> Every user can muck with the congestion control on their computer
> however they want, and THAT'S GOOD!
> 
> Sure we indirectly recommend to distribution vendors what to use
> by default by the Kconfig defaults we put into the vanilla tree,
> and that's fine too.
> 
> Even after reading all of the papers, I still think CUBIC or even BIC
> by default is not all that controbersal or radical thing to use by
> default.
> 
> I'm sorry if the researchers and IETF folks don't like this.  Too bad,
> get over it.

I push the problem back in their court: "Why do you not have a process
that causes consensus?" IETF has done nothing to create any incentive
for long term cooperation.

> If you use RENO you're stupid, since performance is going to stink for
> absolutely normal connections.  Fact: high BDP pipes are everywhere,
> even grandma has one.  So just taking out the best solution we have
> for that problem currently because it's not perfect is not the answer.
> 

Do I need to dig out the "Why Reno sucks" graphs?

-- 
Stephen Hemminger <shemminger@linux-foundation.org>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:37         ` Stephen Hemminger
@ 2007-02-12 20:47           ` David Miller
  0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2007-02-12 20:47 UTC (permalink / raw)
  To: shemminger; +Cc: ian.mcdonald, baruch, netdev

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Feb 2007 12:37:13 -0800

> My patches weren't reactionary. Going to pure old Reno is reactionary.
> It was more looking at the state of the code on the flight back
> and cleaning house. Others were/are reactionary. 

Ok.

The only patch I have a real problem with is the DEFAULT_*
removals, the choices are frankly arbitrary.

Vegas is buggy, that's nice, why don't we simply fix the
bugs in our implementation?

Westwood is very conservative, frankly, and I therefore see
no reason it cannot be offered as a default either.

HTCP doesn't do anything earth shattering either.

I think the whole suite of algorithms in that list are
reasonable.

And even re-reading your patch, you're messing with the
DEFAULT_* setting for the case where the user selected
TCP_CONG_ADVANCED.

I think TCP_CONG_ADVANCED implies an intention by the user,
and if he wants to choose one of those listed as a default
why should we stop them?

The distributions take the default we recommend, and that's
all that matters for wide deployment.

> I push the problem back in their court: "Why do you not have a process
> that causes consensus?" IETF has done nothing to create any incentive
> for long term cooperation.

Yep, this is a good point.

> Do I need to dig out the "Why Reno sucks" graphs?

Hehe :-)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:32       ` David Miller
  2007-02-12 20:37         ` Stephen Hemminger
@ 2007-02-12 21:05         ` Ian McDonald
  1 sibling, 0 replies; 29+ messages in thread
From: Ian McDonald @ 2007-02-12 21:05 UTC (permalink / raw)
  To: David Miller; +Cc: baruch, shemminger, netdev

On 2/13/07, David Miller <davem@davemloft.net> wrote:
> This is not the internet of 15 years ago, please wake up everyone.
> We cannot sit on eggs for 5 years to make sure they hatch perfectly
> like was previously possible.
>
OK. I get the point. I am more conservative by nature and more of an academic.

Now there's been some explanation I'm happier for the change to go
ahead. I just think changes like this that affect the Internet should
be discussed a little. Now it's been discussed a little I feel better.

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://iansblog.jandi.co.nz
WAND Network Research Group

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 1/3] tcp: cleanup of htcp
  2007-02-12 16:03 ` [patch 1/3] tcp: cleanup of htcp Stephen Hemminger
@ 2007-02-12 21:14   ` David Miller
  2007-02-12 21:28     ` [patch 1/3] tcp: cleanup of htcp (resend) Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2007-02-12 21:14 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Feb 2007 08:03:18 -0800

> Minor non-invasive cleanups:
>  * white space around operators and line wrapping
>  * use const
>  * use __read_mostly
> 
> Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>

Yoshifuji fixed all the whitespace issues in the entire
net/ subtree the other day using an automated script,
so you'll need to re-patch this against Linus's current
tree as it doesn't apply cleanly.

Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 2/3] tcp: use read mostly for CUBIC parameters
  2007-02-12 16:03 ` [patch 2/3] tcp: use read mostly for CUBIC parameters Stephen Hemminger
@ 2007-02-12 21:15   ` David Miller
  0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2007-02-12 21:15 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Feb 2007 08:03:19 -0800

> These module parameters should be in the read mostly area
> to avoid cache pollution.
> 
> Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>

Applied, thanks Stephen.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [patch 1/3] tcp: cleanup of htcp (resend)
  2007-02-12 21:14   ` David Miller
@ 2007-02-12 21:28     ` Stephen Hemminger
  2007-02-12 21:34       ` David Miller
  0 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-12 21:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Minor non-invasive cleanups:
 * white space around operators and line wrapping
 * use const
 * use __read_mostly

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/ipv4/tcp_htcp.c |   65 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

--- tcp.orig/net/ipv4/tcp_htcp.c	2007-02-12 12:32:55.000000000 -0800
+++ tcp/net/ipv4/tcp_htcp.c	2007-02-12 12:46:48.000000000 -0800
@@ -10,22 +10,23 @@
 #include <linux/module.h>
 #include <net/tcp.h>
 
-#define ALPHA_BASE	(1<<7)  /* 1.0 with shift << 7 */
-#define BETA_MIN	(1<<6)  /* 0.5 with shift << 7 */
+#define ALPHA_BASE	(1<<7)	/* 1.0 with shift << 7 */
+#define BETA_MIN	(1<<6)	/* 0.5 with shift << 7 */
 #define BETA_MAX	102	/* 0.8 with shift << 7 */
 
-static int use_rtt_scaling = 1;
+static int use_rtt_scaling __read_mostly = 1;
 module_param(use_rtt_scaling, int, 0644);
 MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling");
 
-static int use_bandwidth_switch = 1;
+static int use_bandwidth_switch __read_mostly = 1;
 module_param(use_bandwidth_switch, int, 0644);
 MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
 
 struct htcp {
 	u32	alpha;		/* Fixed point arith, << 7 */
 	u8	beta;           /* Fixed point arith, << 7 */
-	u8	modeswitch;     /* Delay modeswitch until we had at least one congestion event */
+	u8	modeswitch;	/* Delay modeswitch
+				   until we had at least one congestion event */
 	u16	pkts_acked;
 	u32	packetcount;
 	u32	minRTT;
@@ -44,14 +45,14 @@
 	u32	lasttime;
 };
 
-static inline u32 htcp_cong_time(struct htcp *ca)
+static inline u32 htcp_cong_time(const struct htcp *ca)
 {
 	return jiffies - ca->last_cong;
 }
 
-static inline u32 htcp_ccount(struct htcp *ca)
+static inline u32 htcp_ccount(const struct htcp *ca)
 {
-	return htcp_cong_time(ca)/ca->minRTT;
+	return htcp_cong_time(ca) / ca->minRTT;
 }
 
 static inline void htcp_reset(struct htcp *ca)
@@ -67,10 +68,12 @@
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
+
 	ca->last_cong = ca->undo_last_cong;
 	ca->maxRTT = ca->undo_maxRTT;
 	ca->old_maxB = ca->undo_old_maxB;
-	return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
+
+	return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
 }
 
 static inline void measure_rtt(struct sock *sk)
@@ -78,17 +81,19 @@
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
-	u32 srtt = tp->srtt>>3;
+	u32 srtt = tp->srtt >> 3;
 
 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
 	if (ca->minRTT > srtt || !ca->minRTT)
 		ca->minRTT = srtt;
 
 	/* max RTT */
-	if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
+	if (icsk->icsk_ca_state == TCP_CA_Open
+	    && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
 		if (ca->maxRTT < ca->minRTT)
 			ca->maxRTT = ca->minRTT;
-		if (ca->maxRTT < srtt && srtt <= ca->maxRTT+msecs_to_jiffies(20))
+		if (ca->maxRTT < srtt
+		    && srtt <= ca->maxRTT + msecs_to_jiffies(20))
 			ca->maxRTT = srtt;
 	}
 }
@@ -116,15 +121,16 @@
 
 	ca->packetcount += pkts_acked;
 
-	if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1)
-			&& now - ca->lasttime >= ca->minRTT
-			&& ca->minRTT > 0) {
-		__u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime);
+	if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1)
+	    && now - ca->lasttime >= ca->minRTT
+	    && ca->minRTT > 0) {
+		__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
+
 		if (htcp_ccount(ca) <= 3) {
 			/* just after backoff */
 			ca->minB = ca->maxB = ca->Bi = cur_Bi;
 		} else {
-			ca->Bi = (3*ca->Bi + cur_Bi)/4;
+			ca->Bi = (3 * ca->Bi + cur_Bi) / 4;
 			if (ca->Bi > ca->maxB)
 				ca->maxB = ca->Bi;
 			if (ca->minB > ca->maxB)
@@ -142,7 +148,7 @@
 		u32 old_maxB = ca->old_maxB;
 		ca->old_maxB = ca->maxB;
 
-		if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) {
+		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
 			ca->beta = BETA_MIN;
 			ca->modeswitch = 0;
 			return;
@@ -150,7 +156,7 @@
 	}
 
 	if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) {
-		ca->beta = (minRTT<<7)/maxRTT;
+		ca->beta = (minRTT << 7) / maxRTT;
 		if (ca->beta < BETA_MIN)
 			ca->beta = BETA_MIN;
 		else if (ca->beta > BETA_MAX)
@@ -169,23 +175,26 @@
 
 	if (diff > HZ) {
 		diff -= HZ;
-		factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ;
+		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / HZ)) / HZ;
 	}
 
 	if (use_rtt_scaling && minRTT) {
-		u32 scale = (HZ<<3)/(10*minRTT);
-		scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */
-		factor = (factor<<3)/scale;
+		u32 scale = (HZ << 3) / (10 * minRTT);
+
+		/* clamping ratio to interval [0.5,10]<<3 */
+		scale = min(max(scale, 1U << 2), 10U << 3);
+		factor = (factor << 3) / scale;
 		if (!factor)
 			factor = 1;
 	}
 
-	ca->alpha = 2*factor*((1<<7)-ca->beta);
+	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
 	if (!ca->alpha)
 		ca->alpha = ALPHA_BASE;
 }
 
-/* After we have the rtt data to calculate beta, we'd still prefer to wait one
+/*
+ * After we have the rtt data to calculate beta, we'd still prefer to wait one
  * rtt before we adjust our beta to ensure we are working from a consistent
  * data.
  *
@@ -202,15 +211,16 @@
 	htcp_beta_update(ca, minRTT, maxRTT);
 	htcp_alpha_update(ca);
 
-	/* add slowly fading memory for maxRTT to accommodate routing changes etc */
+	/* add slowly fading memory for maxRTT to accommodate routing changes */
 	if (minRTT > 0 && maxRTT > minRTT)
-		ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
+		ca->maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
 }
 
 static u32 htcp_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct htcp *ca = inet_csk_ca(sk);
+
 	htcp_param_update(sk);
 	return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
@@ -227,7 +237,6 @@
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
 	else {
-
 		measure_rtt(sk);
 
 		/* In dangerous area, increase slowly.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 1/3] tcp: cleanup of htcp (resend)
  2007-02-12 21:28     ` [patch 1/3] tcp: cleanup of htcp (resend) Stephen Hemminger
@ 2007-02-12 21:34       ` David Miller
  0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2007-02-12 21:34 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Feb 2007 13:28:22 -0800

> Minor non-invasive cleanups:
>  * white space around operators and line wrapping
>  * use const
>  * use __read_mostly
> 
> Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>

Applied, thanks Stephen.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 20:20     ` David Miller
@ 2007-02-12 22:12       ` Baruch Even
  2007-02-12 22:53         ` David Miller
  0 siblings, 1 reply; 29+ messages in thread
From: Baruch Even @ 2007-02-12 22:12 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, netdev

* David Miller <davem@davemloft.net> [070212 22:21]:
> From: Baruch Even <baruch@ev-en.org>
> Date: Mon, 12 Feb 2007 21:11:01 +0200
> 
> > Since no one really agrees on the relative merits and problems of the
> > different algorithms and since the users themselves dont know, dont care
> > and have no clue on what should be the correct behaviour to report bugs
> > (see the old bic bugs, the htcp bugs, the recent sack bugs) I would
> > suggest to avoid making the whole internet a guinea pig and get back to
> > reno. If someone really needs to push high BDP flows he should test it
> > himself and choose what works for his kernel at the time.
> > 
> > For myself and anyone who asks me I recommend to set the default to
> > reno. For the few who really need high speed flows, they should test
> > kernel and protocol combination.
> 
> We have "high BDP flows" even going from between the east and the west
> coast of the United States.
> 
> This doesn't even begin to touch upon extremely well connected
> coutries like South Korea and what happens when people there try to
> access sites in Europe or the US.
> 
> Good high BDP flow handling is necessary now and for everyday usage of
> the internet, it's not some obscure thing only researchers in fancy
> labs need.
> 
> This also isn't the internet of 15 years ago where IETF members can
> spend 4 or 5 years masterbating over new ideas before deploying them.
> I know that's what conservative folks want, but it isn't going to
> happen.

The problem is that you actually put a mostly untested algorithm as the
default for everyone to use. The BIC example is important, it was the
default algorithm for a long while and had implementation bugs that no
one cared for. The behaviour of cubic wasn't properly verified as the
algorithm in the linux kernel is not the one that was actually proposed
and you intend to make it the default without sufficient testing, that
seems to me to be quite unreasonable.

As to the reasoning that the new algorithms are supposed to act like
Reno, that needs to be verified as well, it's not evident from the code
itself.

Baruch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 22:12       ` Baruch Even
@ 2007-02-12 22:53         ` David Miller
  2007-02-13  9:56           ` Baruch Even
  0 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2007-02-12 22:53 UTC (permalink / raw)
  To: baruch; +Cc: shemminger, netdev

From: Baruch Even <baruch@ev-en.org>
Date: Tue, 13 Feb 2007 00:12:41 +0200

> The problem is that you actually put a mostly untested algorithm as the
> default for everyone to use. The BIC example is important, it was the
> default algorithm for a long while and had implementation bugs that no
> one cared for.

And if our TCP Reno implementation had some bugs, what should
we change the default to?  This is just idiotic logic.

These kinds of comments are just wanking, and lead to nowhere,
so please kill the noise.

If we have bugs in a particular algorithm, we should just fix
them.

> As to the reasoning that the new algorithms are supposed to act like
> Reno, that needs to be verified as well, it's not evident from the
> code itself.

If you're not convinced and are intrested enough, then you should go
verify whether it is in fact true.  This is how things work.

If you find that it isn't true, great then we'll know about it and
have an opportunity to fix the bug.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-12 22:53         ` David Miller
@ 2007-02-13  9:56           ` Baruch Even
  2007-02-13 16:49             ` SANGTAE HA
                               ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Baruch Even @ 2007-02-13  9:56 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, netdev

* David Miller <davem@davemloft.net> [070213 00:53]:
> From: Baruch Even <baruch@ev-en.org>
> Date: Tue, 13 Feb 2007 00:12:41 +0200
> 
> > The problem is that you actually put a mostly untested algorithm as the
> > default for everyone to use. The BIC example is important, it was the
> > default algorithm for a long while and had implementation bugs that no
> > one cared for.
> 
> And if our TCP Reno implementation had some bugs, what should
> we change the default to?  This is just idiotic logic.
> 
> These kinds of comments are just wanking, and lead to nowhere,
> so please kill the noise.
> 
> If we have bugs in a particular algorithm, we should just fix
> them.

I hope you've finished attempting to insult me. But I hope it won't
prevent you from getting back to the topic. The above quote of me was a
prelude to show the repeat behaviour where bic was added without
testing, modified by Stephen and made default with no serious testing of
what was put in the kernel.

It seems this happens again no with cubic. And you failed to respond to
this issue.

> > The behaviour of cubic wasn't properly verified as the
> > algorithm in the linux kernel is not the one that was actually proposed
> > and you intend to make it the default without sufficient testing, that
> > seems to me to be quite unreasonable.

According to claims of Doug Leith the cubic algorithm that is in the
kernel is different from what was proposed and tested. That's an
important issue which is deflected by personal attacks.

My main gripe is that there is a run to make an untested algorithm the
default for all Linux installations. And saying that I should test it is
not an escape route, if it's untested it shouldn't be made the default
algorithm.

My skimming of the PFLDNet 2007 proceedings showed only the works by
Injong and Doug on Cubic and Injong tested some version on Linux
2.6.13(!) which might noe be the version in the current tree. Doug shows
some weaknesses of the Cubic algorithm as implemented in Linux.

Do you still think that making Cubic the default is a good idea?

Baruch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13  9:56           ` Baruch Even
@ 2007-02-13 16:49             ` SANGTAE HA
  2007-02-13 17:42               ` Baruch Even
  2007-02-13 20:06               ` David Miller
  2007-02-13 17:41             ` Injong Rhee
  2007-02-13 19:56             ` David Miller
  2 siblings, 2 replies; 29+ messages in thread
From: SANGTAE HA @ 2007-02-13 16:49 UTC (permalink / raw)
  To: Baruch Even; +Cc: David Miller, shemminger, netdev

Hi Baruch,

I would like to add some comments on your argument.

On 2/13/07, Baruch Even <baruch@ev-en.org> wrote:
> * David Miller <davem@davemloft.net> [070213 00:53]:
> > From: Baruch Even <baruch@ev-en.org>
> > Date: Tue, 13 Feb 2007 00:12:41 +0200
> >
> > > The problem is that you actually put a mostly untested algorithm as the
> > > default for everyone to use. The BIC example is important, it was the
> > > default algorithm for a long while and had implementation bugs that no
> > > one cared for.
> >
> > And if our TCP Reno implementation had some bugs, what should
> > we change the default to?  This is just idiotic logic.
> >
> > These kinds of comments are just wanking, and lead to nowhere,
> > so please kill the noise.
> >
> > If we have bugs in a particular algorithm, we should just fix
> > them.
>
> I hope you've finished attempting to insult me. But I hope it won't
> prevent you from getting back to the topic. The above quote of me was a
> prelude to show the repeat behaviour where bic was added without
> testing, modified by Stephen and made default with no serious testing of
> what was put in the kernel.


What kind of serious testing you want to? I've been testing all
highspeed protocols including BIC and CUBIC for two and half years
now. Even Stephen didn't test CUBIC algorithm by himself, he might see
the results from our experimental studies. I don't care what algorithm
is default in kernel, however, it is not appropriate to get back to
Reno. As Windows decided to go with "Compound TCP", why we want to
back to 80's algorithm?


>
> It seems this happens again no with cubic. And you failed to respond to
> this issue.
>
> > > The behaviour of cubic wasn't properly verified as the
> > > algorithm in the linux kernel is not the one that was actually proposed
> > > and you intend to make it the default without sufficient testing, that
> > > seems to me to be quite unreasonable.
>
> According to claims of Doug Leith the cubic algorithm that is in the
> kernel is different from what was proposed and tested. That's an
> important issue which is deflected by personal attacks.

Did you read that paper?
http://wil.cs.caltech.edu/pfldnet2007/paper/CUBIC_analysis.pdf
Then, please read the rebuttal for that paper.
http://www.csc.ncsu.edu/faculty/rhee/Rebuttal-LSM-new.pdf

Also, the implementation can be different. The cubic code inside of
current kernel introduces faster calculation of cubic root. Even
though we had some bugs on CUBIC implementation, it is fixed now.


> > My main gripe is that there is a run to make an untested algorithm the
> default for all Linux installations. And saying that I should test it is
> not an escape route, if it's untested it shouldn't be made the default
> algorithm.

What is criteria for "untested"?  Who judges that this algorithm is
fully tested and is ready to use?

Could you tell me some other groups did more testing than ours?

Dummynet Testbed Result
http://netsrv.csc.ncsu.edu/highspeed/
http://netsrv.csc.ncsu.edu/convex-ordering/
http://www.csc.ncsu.edu/faculty/rhee/export/comnet-v3-rhee.pdf

Real testing between Korea and Japan (Seoul-Daejon-Busan-Japan)
http://netsrv.csc.ncsu.edu/highspeed/exp/

We still do testing with latest kernel version on production
networks(4ms, 6ms, 9ms, 45ms, and 200ms). I will post the results when
those are ready.


>
> My skimming of the PFLDNet 2007 proceedings showed only the works by
> Injong and Doug on Cubic and Injong tested some version on Linux
> 2.6.13(!) which might noe be the version in the current tree. Doug shows
> some weaknesses of the Cubic algorithm as implemented in Linux.

As I mentioned, please read the paper and rebuttal carefully. Also, in
PFLDnet 2007, Prof. R. Srikant proposed a new algorithm that uses BIC
and CUBIC curve based on delay estimation even he didn't know about
BIC and CUBIC before. I felt the CUBIC algorithm itself is not a bad
idea as other newly proposed algorithms follow BIC and CUBIC
approaches. I admit all proposed algorithms have their advantages over
others.

> Do you still think that making Cubic the default is a good idea?

Then, what do you want to make a default? You want to get back to BIC? or Reno?

>
> Baruch
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

Sangtae

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13  9:56           ` Baruch Even
  2007-02-13 16:49             ` SANGTAE HA
@ 2007-02-13 17:41             ` Injong Rhee
  2007-02-13 18:23               ` Baruch Even
  2007-02-13 19:56             ` David Miller
  2 siblings, 1 reply; 29+ messages in thread
From: Injong Rhee @ 2007-02-13 17:41 UTC (permalink / raw)
  To: Baruch Even; +Cc: Stephen Hemminger, David Miller, netdev


On Feb 13, 2007, at 4:56 AM, Baruch Even wrote:

>
> According to claims of Doug Leith the cubic algorithm that is in the
> kernel is different from what was proposed and tested. That's an
> important issue which is deflected by personal attacks.

It is not the algorithm "untested" -- it is the implementation not
fully tested. This is exactly the reason we are proposing to build a 
common, convenient,
accessible testbed equipped with a full set of automated testing 
scenarios. This
would be useful to crack out these bugs. There could be a weakness in 
an algorithm, but
there is no bug in the algorithm.

>
> My main gripe is that there is a run to make an untested algorithm the
> default for all Linux installations. And saying that I should test it 
> is
> not an escape route, if it's untested it shouldn't be made the default
> algorithm.
>
> My skimming of the PFLDNet 2007 proceedings showed only the works by
> Injong and Doug on Cubic and Injong tested some version on Linux
> 2.6.13(!) which might noe be the version in the current tree. Doug 
> shows
> some weaknesses of the Cubic algorithm as implemented in Linux.\

That version we tested is patched w/ our implementation of CUBIC. You 
can get
the patch from our site so it is a correct implemenation.
The linux implementation is ever evolving and it is
hard to keep track of everything at an instance. So we are using our 
own version
for our paper publishing. But we also test existing versions of Linux 
implementation.
The version that D. Leith has tested is a version that fixes the 
earlier bug.
Note that having bugs in the implementation does not warrant attacks on
the algorithm itself.

Some "weakness" of CUBIC.. please read my rebuttal
on that paper in my website:  http://www.csc.ncsu.edu/faculty/rhee/
The slow convergence issue of CUBIC has been inherent
feature of CUBIC -- that is a design tradeoff we make when we design
BIC and CUBIC. Depending on the testing environment, CUBIC has
very fast convergence, but only in a very low statistical multiplexing 
environment,
the conversion is slow. WE HAVE ADMITTED THAT SIN.
  So he did not exactly FIND that problem. Also on  the
other issues he just raised..just please read that rebuttal.

I am just sick and tired of hearing all these nonsenses that people 
spray based on some
incidental observation on the behavior of protocols without proper 
comparison
and reasoning why a protocol could behave in that environment being 
tested.


>
> Do you still think that making Cubic the default is a good idea?

Do you think H-TCP could make a good candidate? I remember there are 
bugs in
H-TCP implementation (which went on unnoticed for a long time) -- Leith 
claims
his team found the bugs -- but it seems a little of coincidence that 
after we post
our report on a strange behavior on H-TCP, D. Leith came back saying 
they
found the bugs (no attribution..hmm). We also found some problem
in the weakness of H-TCP algorithm
(not implementation) as well (please read our Convex ordering
paper in PFLDnet07). Based on the same argument of yours, then H-TCP
does not make the cut. I guess none of TCP protocols would have made 
the cut
either.


>
> Baruch
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13 16:49             ` SANGTAE HA
@ 2007-02-13 17:42               ` Baruch Even
  2007-02-13 19:54                 ` John Heffner
  2007-02-13 20:06               ` David Miller
  1 sibling, 1 reply; 29+ messages in thread
From: Baruch Even @ 2007-02-13 17:42 UTC (permalink / raw)
  To: SANGTAE HA; +Cc: David Miller, shemminger, netdev

* SANGTAE HA <sangtae.ha@gmail.com> [070213 18:50]:
> Hi Baruch,
> 
> I would like to add some comments on your argument.
> 
> On 2/13/07, Baruch Even <baruch@ev-en.org> wrote:
> >* David Miller <davem@davemloft.net> [070213 00:53]:
> >> From: Baruch Even <baruch@ev-en.org>
> >> Date: Tue, 13 Feb 2007 00:12:41 +0200
> >>
> >> > The problem is that you actually put a mostly untested algorithm as the
> >> > default for everyone to use. The BIC example is important, it was the
> >> > default algorithm for a long while and had implementation bugs that no
> >> > one cared for.
> >>
> >> And if our TCP Reno implementation had some bugs, what should
> >> we change the default to?  This is just idiotic logic.
> >>
> >> These kinds of comments are just wanking, and lead to nowhere,
> >> so please kill the noise.
> >>
> >> If we have bugs in a particular algorithm, we should just fix
> >> them.
> >
> >I hope you've finished attempting to insult me. But I hope it won't
> >prevent you from getting back to the topic. The above quote of me was a
> >prelude to show the repeat behaviour where bic was added without
> >testing, modified by Stephen and made default with no serious testing of
> >what was put in the kernel.
> 
> 
> What kind of serious testing you want to? I've been testing all
> highspeed protocols including BIC and CUBIC for two and half years
> now. Even Stephen didn't test CUBIC algorithm by himself, he might see
> the results from our experimental studies. I don't care what algorithm
> is default in kernel, however, it is not appropriate to get back to
> Reno. As Windows decided to go with "Compound TCP", why we want to
> back to 80's algorithm?

I fail to see how Microsoft should be the reason for anything, if
anything Linux started the arms race.

> >It seems this happens again no with cubic. And you failed to respond to
> >this issue.
> >
> >> > The behaviour of cubic wasn't properly verified as the
> >> > algorithm in the linux kernel is not the one that was actually proposed
> >> > and you intend to make it the default without sufficient testing, that
> >> > seems to me to be quite unreasonable.
> >
> >According to claims of Doug Leith the cubic algorithm that is in the
> >kernel is different from what was proposed and tested. That's an
> >important issue which is deflected by personal attacks.
> 
> Did you read that paper?
> http://wil.cs.caltech.edu/pfldnet2007/paper/CUBIC_analysis.pdf
> Then, please read the rebuttal for that paper.
> http://www.csc.ncsu.edu/faculty/rhee/Rebuttal-LSM-new.pdf
> 
> Also, the implementation can be different. The cubic code inside of
> current kernel introduces faster calculation of cubic root. Even
> though we had some bugs on CUBIC implementation, it is fixed now.

We have seen before with bic that the different implementation meant
that things didnt work as expected. I wouldn't like it to happen again.

> 
> >> My main gripe is that there is a run to make an untested algorithm the
> >default for all Linux installations. And saying that I should test it is
> >not an escape route, if it's untested it shouldn't be made the default
> >algorithm.
> 
> What is criteria for "untested"?  Who judges that this algorithm is
> fully tested and is ready to use?

Did you do your tests on 2.6.20? Did you verify that the algorithm
actually behaves as it should? I don't think anyone did any real tests
on the cubic version in the kernel and I fear a repeat of the bic issue.
Code that is untested is likely not to work and as far as I understand
it you didn't test the current kernel version but rather your own code
on an ancient kernel.

I'd be happy to be proven wrong and shown tests of cubic in the latest
kernel. Saying that I shuold do it myself if concerned is the wrong way
to go. I no longer have access to test equipment to do that and we
should not make an algorithm the default without sufficient testing.

> We still do testing with latest kernel version on production
> networks(4ms, 6ms, 9ms, 45ms, and 200ms). I will post the results when
> those are ready.

That would be an important step indeed.

> >My skimming of the PFLDNet 2007 proceedings showed only the works by
> >Injong and Doug on Cubic and Injong tested some version on Linux
> >2.6.13(!) which might noe be the version in the current tree. Doug shows
> >some weaknesses of the Cubic algorithm as implemented in Linux.
> 
> As I mentioned, please read the paper and rebuttal carefully. Also, in

I'll do that later on, but quick reading shows web traffic with a
minimum of 137KB, which doesn't seem to be very realistic. But I need to
read deeper to see what goes there.

> >Do you still think that making Cubic the default is a good idea?
> 
> Then, what do you want to make a default? You want to get back to BIC? or Reno?

I don't claim to have all the right answers, I would prefer to go to
Reno and I don't buy the argument of DaveM that we have to use some
fancy high speed algorithm, even if we do go for one I'd prefer a safer
choice like HSTCP.

Baruch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13 17:41             ` Injong Rhee
@ 2007-02-13 18:23               ` Baruch Even
  0 siblings, 0 replies; 29+ messages in thread
From: Baruch Even @ 2007-02-13 18:23 UTC (permalink / raw)
  To: Injong Rhee; +Cc: Stephen Hemminger, David Miller, netdev

* Injong Rhee <rhee@eos.ncsu.edu> [070213 19:43]:
> 
> On Feb 13, 2007, at 4:56 AM, Baruch Even wrote:
> 
> >
> >According to claims of Doug Leith the cubic algorithm that is in the
> >kernel is different from what was proposed and tested. That's an
> >important issue which is deflected by personal attacks.
> 
> It is not the algorithm "untested" -- it is the implementation not
> fully tested. This is exactly the reason we are proposing to build a common, convenient,
> accessible testbed equipped with a full set of automated testing scenarios. This
> would be useful to crack out these bugs. There could be a weakness in an algorithm, but
> there is no bug in the algorithm.

Yes. That was bad terminology on my part.

A testbed would be nice, I've heard several times about ideas to do that
but haven't seen anything materialize yet.

> >Do you still think that making Cubic the default is a good idea?
> 
> Do you think H-TCP could make a good candidate? I remember there are bugs in

I don't. And if you'd bother looking back at the thread you'd see that I
didn't even consider that an option. You automatically assume that I'm
only trying to further H-TCP, far from it. I've finished my MSc already
and am back to being a free man with his own thoughts. I've seen what
happened before and want to prevent that from happening again. I think
that the bic algorithm wasn't good enough and the implementation was
even buggier and still it was made the default without much thought and
no one thought to pull it back.

> H-TCP implementation (which went on unnoticed for a long time) -- Leith claims
> his team found the bugs -- but it seems a little of coincidence that after we post
> our report on a strange behavior on H-TCP, D. Leith came back saying they
> found the bugs (no attribution..hmm).

I'm the one who found the issue and I can assure you that I didn't see
any notice from you before I did that. I was simply migrating my work
from an older kernel with our patches to the latest kernel at the time
with the patches as committed by Stephen. There was a difference between
what was submitted by myself and what was committed and it took us time
to detect that for the same reason I'm worried about the existing cubic
implementation, we were using our own patches and not testing the Linux
implementation. This is the same thing that is happening now with cubic.

> We also found some problem in the weakness of H-TCP algorithm (not
> implementation) as well (please read our Convex ordering paper in
> PFLDnet07). Based on the same argument of yours, then H-TCP does not
> make the cut. I guess none of TCP protocols would have made the cut
> either.

Bingo.

Baruch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13 17:42               ` Baruch Even
@ 2007-02-13 19:54                 ` John Heffner
  0 siblings, 0 replies; 29+ messages in thread
From: John Heffner @ 2007-02-13 19:54 UTC (permalink / raw)
  To: netdev; +Cc: Baruch Even, SANGTAE HA, David Miller, shemminger

This isn't really a reply to anyone in particular, but I wanted to touch 
on a few points.


>> Reno. As Windows decided to go with "Compound TCP", why we want to
>> back to 80's algorithm?

It's worth noting that Microsoft is not using Compound TCP by default, 
except in Beta versions so they can get more experience with it.  It is 
available to turn on in production versions, but Reno is still default. 
  Take this how you will, but that's the current state of affairs.


> I fail to see how Microsoft should be the reason for anything, if
> anything Linux started the arms race.

I'd like to put to bed this notion of an arms race.  A number of people 
have accused Linux and Windows of competing with each other to be more 
aggressive, which is just not the case.  The use of non-standard 
congestion control algorithms is due to a real need to fill underused 
large pipes.  In fact, if Linux or Windows stomped on top of other TCPs 
in production, it would lead to a bad reputation for the one doing the 
stomping, and is something everyone is eager to avoid.  It would be 
easier to design an extremely aggressive control algorithm.  The hard 
work is in achieving the desired properties of fairness, stability, 
etc., in addition to high utilization.

Some care has been taken (okay, with varying success) in designing each 
of the default candidate algorithms to avoid harming standard Reno-style 
flows under "normal" conditions.  If an algorithms meets this 
requirement, then there's almost no reason at this point not to use it. 
  The main issue for the future is dealing with the interaction between 
various (possibly unknown) congestion control algorithms.  From an 
academic point of view, it's very difficult to say anything about how 
they might interact.  At least it's more difficult than modeling how 
flows using a single algorithm interact with each other.  This is 
something of a concern, but we must weigh this against the pressing 
demand for something better than reno.  Further, there's all sorts of 
traffic out there on the Internet with varying responsiveness, as there 
is no enforcement of any particular model of congestion control.  This 
must be taken into account, regardless of what Linux chooses as its 
default at any point in time.

   -John

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13  9:56           ` Baruch Even
  2007-02-13 16:49             ` SANGTAE HA
  2007-02-13 17:41             ` Injong Rhee
@ 2007-02-13 19:56             ` David Miller
  2007-02-13 20:06               ` Baruch Even
  2 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2007-02-13 19:56 UTC (permalink / raw)
  To: baruch; +Cc: shemminger, netdev

From: Baruch Even <baruch@ev-en.org>
Date: Tue, 13 Feb 2007 11:56:13 +0200

> Do you still think that making Cubic the default is a good idea?

Can you propose a better alternative other than Reno?

You've cited only "unknown unknowns" and that's not something tangible
we can work with.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13 19:56             ` David Miller
@ 2007-02-13 20:06               ` Baruch Even
  0 siblings, 0 replies; 29+ messages in thread
From: Baruch Even @ 2007-02-13 20:06 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, netdev

* David Miller <davem@davemloft.net> [070213 21:56]:
> From: Baruch Even <baruch@ev-en.org>
> Date: Tue, 13 Feb 2007 11:56:13 +0200
> 
> > Do you still think that making Cubic the default is a good idea?
> 
> Can you propose a better alternative other than Reno?

The only other option would be HS-TCP. It is a very simple extension of
Reno and should be good enough for the current common high BDP
connections. If not that then the choice is to keep on BIC and test the
existing implementation of Cubic before making it the default, if at
all.

I still think that using a default of high-speed algorithm is not the
right thing to do, but I'd rather have a sane default rather than go for
the latest proposal to catch the eye.

Baruch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13 16:49             ` SANGTAE HA
  2007-02-13 17:42               ` Baruch Even
@ 2007-02-13 20:06               ` David Miller
  2007-02-13 20:23                 ` Stephen Hemminger
  1 sibling, 1 reply; 29+ messages in thread
From: David Miller @ 2007-02-13 20:06 UTC (permalink / raw)
  To: sangtae.ha; +Cc: baruch, shemminger, netdev

From: "SANGTAE HA" <sangtae.ha@gmail.com>
Date: Tue, 13 Feb 2007 11:49:47 -0500

> I don't care what algorithm is default in kernel, however, it is not
> appropriate to get back to Reno. As Windows decided to go with
> "Compound TCP", why we want to back to 80's algorithm?

I want to re-emphasize this, in BIG BOLD LETTERS, that going back to
Reno is totally unacceptable recommendation to anyone in the current
internet environment.

In fact, it's a cop out to tell people they should use Reno instead
of one of the modern algorithms that can handle high BDP paths.

> What is criteria for "untested"?  Who judges that this algorithm is
> fully tested and is ready to use?

This is another excellent point.

Right now, in the end, the people who decide this are roughly
Stephen and myself.  So we do need to read the papers (we did),
think about the algorithm, and figure out what's best for Linux.
And that means balancing each possibility with each reasonable
alternative, and Reno is NOT a reasonable alternative.

And right now what's best for Linux, from our perspective, is a
default of CUBIC.

> Then, what do you want to make a default? You want to get back to
> BIC? or Reno?

Baruch has already stated that he recommends people to use Reno.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [patch 3/3] tcp: remove experimental variants from default list
  2007-02-13 20:06               ` David Miller
@ 2007-02-13 20:23                 ` Stephen Hemminger
  0 siblings, 0 replies; 29+ messages in thread
From: Stephen Hemminger @ 2007-02-13 20:23 UTC (permalink / raw)
  To: David Miller; +Cc: sangtae.ha, baruch, netdev

My somewhat biased capsule summary is:

Algorithms:

Reno: Linux never really implemented pure Reno anyway, see
	http://www.cs.helsinki.fi/research/iwtcp/papers/linuxtcp.pdf
   This makes anybody doing pure ns2 based comparisons suspect.
   The problem is Reno rolls off

HSTCP: too aggressive and can be unfair
BIC: not fair to Reno
CUBIC: good fairness but depends on additional traffic to converge faster
HTCP: good fairness but high variation
Vegas: reduces loss but sensitive to delay variation and back channel
Westwood: reduces loss but slow growth on high BDP

Not evaluated enough: Hydra, VENO

The biggest issue with CUBIC (and before that BIC) has been 
bugs with a long mean-time-to-discovery (but MTTR has been fast). 
The others don't seem to get as much attention, perhaps
we should turn a different congestion control algorithm as default
on each -mm release to get people to actually look at the others.

There are some newer congestion control algorithms coming:
TCP Illinois, a newer version of Westwood, TCP-Fusion, Exp-TCP
and maybe Adaptive RENO. Stay tuned.

-- 
Stephen Hemminger <shemminger@linux-foundation.org>

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2007-02-13 20:23 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-12 16:03 [patch 0/3] TCP trivial patches Stephen Hemminger
2007-02-12 16:03 ` [patch 1/3] tcp: cleanup of htcp Stephen Hemminger
2007-02-12 21:14   ` David Miller
2007-02-12 21:28     ` [patch 1/3] tcp: cleanup of htcp (resend) Stephen Hemminger
2007-02-12 21:34       ` David Miller
2007-02-12 16:03 ` [patch 2/3] tcp: use read mostly for CUBIC parameters Stephen Hemminger
2007-02-12 21:15   ` David Miller
2007-02-12 16:03 ` [patch 3/3] tcp: remove experimental variants from default list Stephen Hemminger
2007-02-12 19:11   ` Baruch Even
2007-02-12 20:13     ` Ian McDonald
2007-02-12 20:26       ` Stephen Hemminger
2007-02-12 20:34         ` David Miller
2007-02-12 20:32       ` David Miller
2007-02-12 20:37         ` Stephen Hemminger
2007-02-12 20:47           ` David Miller
2007-02-12 21:05         ` Ian McDonald
2007-02-12 20:20     ` David Miller
2007-02-12 22:12       ` Baruch Even
2007-02-12 22:53         ` David Miller
2007-02-13  9:56           ` Baruch Even
2007-02-13 16:49             ` SANGTAE HA
2007-02-13 17:42               ` Baruch Even
2007-02-13 19:54                 ` John Heffner
2007-02-13 20:06               ` David Miller
2007-02-13 20:23                 ` Stephen Hemminger
2007-02-13 17:41             ` Injong Rhee
2007-02-13 18:23               ` Baruch Even
2007-02-13 19:56             ` David Miller
2007-02-13 20:06               ` Baruch Even

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.