All of lore.kernel.org
 help / color / mirror / Atom feed
* net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP
@ 2010-08-30  5:23   ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev

Dave,

please can you have a look at the attached set from the DCCP test tree; it 
tidies up CCID-2 code and implements sharing of code common to TCP and DCCP.

Patch #1: aligns CCID-2 timestamps with those used by TCP.
Patch #2: removes redundant CCID-2 sk-timer functions.
Patch #3: consolidates RFC3390 initial-window code common to TCP and CCID-2 
	  (please note that this still uses the old numbers which have been
	   updated by RFC 5681: I'll send a separate patch documenting that).
Patch #4: shares the minimum-RTO code between TCP and CCID-2.
Patch #5: shares the minimum-RTO code between TCP and CCID-3.

All patches have been tested to compile independently and are at the top of
git://eden-feed.erg.abdn.ac.uk/dccp_exp (subtree 'dccp').

^ permalink raw reply	[flat|nested] 27+ messages in thread

* net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP
@ 2010-08-30  5:23   ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: dccp

Dave,

please can you have a look at the attached set from the DCCP test tree; it 
tidies up CCID-2 code and implements sharing of code common to TCP and DCCP.

Patch #1: aligns CCID-2 timestamps with those used by TCP.
Patch #2: removes redundant CCID-2 sk-timer functions.
Patch #3: consolidates RFC3390 initial-window code common to TCP and CCID-2 
	  (please note that this still uses the old numbers which have been
	   updated by RFC 5681: I'll send a separate patch documenting that).
Patch #4: shares the minimum-RTO code between TCP and CCID-2.
Patch #5: shares the minimum-RTO code between TCP and CCID-3.

All patches have been tested to compile independently and are at the top of
git://eden-feed.erg.abdn.ac.uk/dccp_exp (subtree 'dccp').

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/5] dccp ccid-2: Use u32 timestamps uniformly
@ 2010-08-30  5:23     ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker

Since CCID-2 is de facto a mini implementation of TCP, it makes sense to share
as much code as possible.

Hence this patch aligns CCID-2 timestamping with TCP timestamping.
This also halves the space consumption (on 64-bit systems).

The necessary include file <net/tcp.h> is already included by way of
net/dccp.h. Redundant includes have been removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.h |   15 ++++++++++-----
 net/dccp/ccids/ccid2.c |   14 ++++++--------
 2 files changed, 16 insertions(+), 13 deletions(-)

--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
 #ifndef _DCCP_CCID2_H_
 #define _DCCP_CCID2_H_
 
-#include <linux/dccp.h>
 #include <linux/timer.h>
 #include <linux/types.h>
 #include "../ccid.h"
+#include "../dccp.h"
+
+/*
+ * CCID-2 timestamping faces the same issues as TCP timestamping.
+ * Hence we reuse/share as much of the code as possible.
+ */
+#define ccid2_time_stamp	tcp_time_stamp
+
 /* NUMDUPACK parameter from RFC 4341, p. 6 */
 #define NUMDUPACK	3
 
-struct sock;
-
 struct ccid2_seq {
 	u64			ccid2s_seq;
-	unsigned long		ccid2s_sent;
+	u32			ccid2s_sent;
 	int			ccid2s_acked;
 	struct ccid2_seq	*ccid2s_prev;
 	struct ccid2_seq	*ccid2s_next;
@@ -72,7 +77,7 @@ struct ccid2_hc_tx_sock {
 
 	u64			tx_rpseq;
 	int			tx_rpdupack;
-	unsigned long		tx_last_cong;
+	u32			tx_last_cong;
 	u64			tx_high_ack;
 };
 
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,8 +25,6 @@
  */
 #include <linux/slab.h>
 #include "../feat.h"
-#include "../ccid.h"
-#include "../dccp.h"
 #include "ccid2.h"
 
 
@@ -175,7 +173,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 
 	hc->tx_seqh->ccid2s_seq   = dp->dccps_gss;
 	hc->tx_seqh->ccid2s_acked = 0;
-	hc->tx_seqh->ccid2s_sent  = jiffies;
+	hc->tx_seqh->ccid2s_sent  = ccid2_time_stamp;
 
 	next = hc->tx_seqh->ccid2s_next;
 	/* check if we need to alloc more space */
@@ -250,7 +248,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 		struct ccid2_seq *seqp = hc->tx_seqt;
 
 		while (seqp != hc->tx_seqh) {
-			ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
+			ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
 				       (unsigned long long)seqp->ccid2s_seq,
 				       seqp->ccid2s_acked, seqp->ccid2s_sent);
 			seqp = seqp->ccid2s_next;
@@ -431,19 +429,19 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
 	 * The cleanest solution is to not use the ccid2s_sent field at all
 	 * and instead use DCCP timestamps: requires changes in other places.
 	 */
-	ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
+	ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
 }
 
 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 {
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 
-	if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) {
+	if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
 		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
 		return;
 	}
 
-	hc->tx_last_cong = jiffies;
+	hc->tx_last_cong = ccid2_time_stamp;
 
 	hc->tx_cwnd      = hc->tx_cwnd / 2 ? : 1U;
 	hc->tx_ssthresh  = max(hc->tx_cwnd, 2U);
@@ -683,7 +681,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	hc->tx_rto	 = DCCP_TIMEOUT_INIT;
 	hc->tx_rpdupack  = -1;
-	hc->tx_last_cong = jiffies;
+	hc->tx_last_cong = ccid2_time_stamp;
 	setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
 			(unsigned long)sk);
 	return 0;

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/5] dccp ccid-2: Use u32 timestamps uniformly
@ 2010-08-30  5:23     ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: dccp

Since CCID-2 is de facto a mini implementation of TCP, it makes sense to share
as much code as possible.

Hence this patch aligns CCID-2 timestamping with TCP timestamping.
This also halves the space consumption (on 64-bit systems).

The necessary include file <net/tcp.h> is already included by way of
net/dccp.h. Redundant includes have been removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.h |   15 ++++++++++-----
 net/dccp/ccids/ccid2.c |   14 ++++++--------
 2 files changed, 16 insertions(+), 13 deletions(-)

--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
 #ifndef _DCCP_CCID2_H_
 #define _DCCP_CCID2_H_
 
-#include <linux/dccp.h>
 #include <linux/timer.h>
 #include <linux/types.h>
 #include "../ccid.h"
+#include "../dccp.h"
+
+/*
+ * CCID-2 timestamping faces the same issues as TCP timestamping.
+ * Hence we reuse/share as much of the code as possible.
+ */
+#define ccid2_time_stamp	tcp_time_stamp
+
 /* NUMDUPACK parameter from RFC 4341, p. 6 */
 #define NUMDUPACK	3
 
-struct sock;
-
 struct ccid2_seq {
 	u64			ccid2s_seq;
-	unsigned long		ccid2s_sent;
+	u32			ccid2s_sent;
 	int			ccid2s_acked;
 	struct ccid2_seq	*ccid2s_prev;
 	struct ccid2_seq	*ccid2s_next;
@@ -72,7 +77,7 @@ struct ccid2_hc_tx_sock {
 
 	u64			tx_rpseq;
 	int			tx_rpdupack;
-	unsigned long		tx_last_cong;
+	u32			tx_last_cong;
 	u64			tx_high_ack;
 };
 
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,8 +25,6 @@
  */
 #include <linux/slab.h>
 #include "../feat.h"
-#include "../ccid.h"
-#include "../dccp.h"
 #include "ccid2.h"
 
 
@@ -175,7 +173,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 
 	hc->tx_seqh->ccid2s_seq   = dp->dccps_gss;
 	hc->tx_seqh->ccid2s_acked = 0;
-	hc->tx_seqh->ccid2s_sent  = jiffies;
+	hc->tx_seqh->ccid2s_sent  = ccid2_time_stamp;
 
 	next = hc->tx_seqh->ccid2s_next;
 	/* check if we need to alloc more space */
@@ -250,7 +248,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 		struct ccid2_seq *seqp = hc->tx_seqt;
 
 		while (seqp != hc->tx_seqh) {
-			ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
+			ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
 				       (unsigned long long)seqp->ccid2s_seq,
 				       seqp->ccid2s_acked, seqp->ccid2s_sent);
 			seqp = seqp->ccid2s_next;
@@ -431,19 +429,19 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
 	 * The cleanest solution is to not use the ccid2s_sent field at all
 	 * and instead use DCCP timestamps: requires changes in other places.
 	 */
-	ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
+	ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
 }
 
 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 {
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 
-	if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) {
+	if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
 		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
 		return;
 	}
 
-	hc->tx_last_cong = jiffies;
+	hc->tx_last_cong = ccid2_time_stamp;
 
 	hc->tx_cwnd      = hc->tx_cwnd / 2 ? : 1U;
 	hc->tx_ssthresh  = max(hc->tx_cwnd, 2U);
@@ -683,7 +681,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	hc->tx_rto	 = DCCP_TIMEOUT_INIT;
 	hc->tx_rpdupack  = -1;
-	hc->tx_last_cong = jiffies;
+	hc->tx_last_cong = ccid2_time_stamp;
 	setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
 			(unsigned long)sk);
 	return 0;

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 2/5] dccp ccid-2: Remove wrappers around sk_{reset,stop}_timer()
@ 2010-08-30  5:23       ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker

This removes the wrappers around the sk timer functions, since not much is
gained from using them: the BUG_ON in start_rto_timer will never trigger
since that function is called only if:

 * the RTO timer expires (rto_expire, and then timer_pending() is false);
 * in tx_packet_sent only if !timer_pending() (BUG_ON is redundant here);
 * previously in new_ack, after stopping the timer (timer_pending() false).

Removing the wrappers also clears the way for eventually replacing the
RTO timer with the icsk-retransmission-timer, as it is already part of the
DCCP socket.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c |   28 +++-------------------------
 1 files changed, 3 insertions(+), 25 deletions(-)

--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -111,8 +111,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_start_rto_timer(struct sock *sk);
-
 static void ccid2_hc_tx_rto_expire(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
@@ -131,7 +129,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	if (hc->tx_rto > DCCP_RTO_MAX)
 		hc->tx_rto = DCCP_RTO_MAX;
 
-	ccid2_start_rto_timer(sk);
+	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 
 	/* adjust pipe, cwnd etc */
 	hc->tx_ssthresh = hc->tx_cwnd / 2;
@@ -153,16 +151,6 @@ out:
 	sock_put(sk);
 }
 
-static void ccid2_start_rto_timer(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
-	ccid2_pr_debug("setting RTO timeout=%u\n", hc->tx_rto);
-
-	BUG_ON(timer_pending(&hc->tx_rtotimer));
-	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-}
-
 static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -239,9 +227,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	}
 #endif
 
-	/* setup RTO timer */
-	if (!timer_pending(&hc->tx_rtotimer))
-		ccid2_start_rto_timer(sk);
+	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	do {
@@ -320,14 +306,6 @@ out_invalid_option:
 	return -1;
 }
 
-static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
-	sk_stop_timer(sk, &hc->tx_rtotimer);
-	ccid2_pr_debug("deleted RTO timer\n");
-}
-
 /**
  * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
  * This code is almost identical with TCP's tcp_rtt_estimator(), since
@@ -692,7 +670,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 	int i;
 
-	ccid2_hc_tx_kill_rto_timer(sk);
+	sk_stop_timer(sk, &hc->tx_rtotimer);
 
 	for (i = 0; i < hc->tx_seqbufc; i++)
 		kfree(hc->tx_seqbuf[i]);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 2/5] dccp ccid-2: Remove wrappers around sk_{reset,stop}_timer()
@ 2010-08-30  5:23       ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: dccp

This removes the wrappers around the sk timer functions, since not much is
gained from using them: the BUG_ON in start_rto_timer will never trigger
since that function is called only if:

 * the RTO timer expires (rto_expire, and then timer_pending() is false);
 * in tx_packet_sent only if !timer_pending() (BUG_ON is redundant here);
 * previously in new_ack, after stopping the timer (timer_pending() false).

Removing the wrappers also clears the way for eventually replacing the
RTO timer with the icsk-retransmission-timer, as it is already part of the
DCCP socket.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c |   28 +++-------------------------
 1 files changed, 3 insertions(+), 25 deletions(-)

--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -111,8 +111,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_start_rto_timer(struct sock *sk);
-
 static void ccid2_hc_tx_rto_expire(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
@@ -131,7 +129,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	if (hc->tx_rto > DCCP_RTO_MAX)
 		hc->tx_rto = DCCP_RTO_MAX;
 
-	ccid2_start_rto_timer(sk);
+	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 
 	/* adjust pipe, cwnd etc */
 	hc->tx_ssthresh = hc->tx_cwnd / 2;
@@ -153,16 +151,6 @@ out:
 	sock_put(sk);
 }
 
-static void ccid2_start_rto_timer(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
-	ccid2_pr_debug("setting RTO timeout=%u\n", hc->tx_rto);
-
-	BUG_ON(timer_pending(&hc->tx_rtotimer));
-	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-}
-
 static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -239,9 +227,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	}
 #endif
 
-	/* setup RTO timer */
-	if (!timer_pending(&hc->tx_rtotimer))
-		ccid2_start_rto_timer(sk);
+	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	do {
@@ -320,14 +306,6 @@ out_invalid_option:
 	return -1;
 }
 
-static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
-	sk_stop_timer(sk, &hc->tx_rtotimer);
-	ccid2_pr_debug("deleted RTO timer\n");
-}
-
 /**
  * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
  * This code is almost identical with TCP's tcp_rtt_estimator(), since
@@ -692,7 +670,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 	int i;
 
-	ccid2_hc_tx_kill_rto_timer(sk);
+	sk_stop_timer(sk, &hc->tx_rtotimer);
 
 	for (i = 0; i < hc->tx_seqbufc; i++)
 		kfree(hc->tx_seqbuf[i]);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion
@ 2010-08-30  5:23         ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker

This patch consolidates initial-window code common to TCP and CCID-2:
 * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
 * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/net/tcp.h      |   15 +++++++++++++++
 net/dccp/ccids/ccid2.c |    8 ++------
 net/ipv4/tcp_input.c   |   17 ++---------------
 3 files changed, 19 insertions(+), 21 deletions(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -789,6 +789,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 /* Use define here intentionally to get WARN_ON location shown at the caller */
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
+/*
+ * Convert RFC 3390 larger initial window into an equivalent number of packets.
+ *
+ * John Heffner states:
+ *
+ *	The RFC specifies a window of no more than 4380 bytes
+ *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
+ *	is a bit misleading because they use a clamp at 4380 bytes
+ *	rather than a multiplier in the relevant range.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 smss)
+{
+	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
+}
+
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 	hc->tx_ssthresh = ~0U;
 
-	/*
-	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
-	 * packets for new connections, following the rules from [RFC3390]".
-	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
-	 */
-	hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+	/* Use larger initial windows (RFC 4341, section 5). */
+	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
 
 	/* Make sure that Ack Ratio is enabled and within bounds. */
 	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
-/* Numbers are taken from RFC3390.
- *
- * John Heffner states:
- *
- *	The RFC specifies a window of no more than 4380 bytes
- *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
- *	is a bit misleading because they use a clamp at 4380 bytes
- *	rather than use a multiplier in the relevant range.
- */
 __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
-	if (!cwnd) {
-		if (tp->mss_cache > 1460)
-			cwnd = 2;
-		else
-			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
-	}
+	if (!cwnd)
+		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion
@ 2010-08-30  5:23         ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: dccp

This patch consolidates initial-window code common to TCP and CCID-2:
 * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
 * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/net/tcp.h      |   15 +++++++++++++++
 net/dccp/ccids/ccid2.c |    8 ++------
 net/ipv4/tcp_input.c   |   17 ++---------------
 3 files changed, 19 insertions(+), 21 deletions(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -789,6 +789,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 /* Use define here intentionally to get WARN_ON location shown at the caller */
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
+/*
+ * Convert RFC 3390 larger initial window into an equivalent number of packets.
+ *
+ * John Heffner states:
+ *
+ *	The RFC specifies a window of no more than 4380 bytes
+ *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
+ *	is a bit misleading because they use a clamp at 4380 bytes
+ *	rather than a multiplier in the relevant range.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 smss)
+{
+	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
+}
+
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 	hc->tx_ssthresh = ~0U;
 
-	/*
-	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
-	 * packets for new connections, following the rules from [RFC3390]".
-	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
-	 */
-	hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+	/* Use larger initial windows (RFC 4341, section 5). */
+	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
 
 	/* Make sure that Ack Ratio is enabled and within bounds. */
 	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
-/* Numbers are taken from RFC3390.
- *
- * John Heffner states:
- *
- *	The RFC specifies a window of no more than 4380 bytes
- *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
- *	is a bit misleading because they use a clamp at 4380 bytes
- *	rather than use a multiplier in the relevant range.
- */
 __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
-	if (!cwnd) {
-		if (tp->mss_cache > 1460)
-			cwnd = 2;
-		else
-			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
-	}
+	if (!cwnd)
+		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 4/5] dccp ccid-2: Share TCP's minimum RTO code
@ 2010-08-30  5:23           ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker

Using a fixed RTO_MIN of 0.2 seconds was found to cause problems for CCID-2
over 802.11g: at least once per session there was a spurious timeout. It
helped to then increase the the value of RTO_MIN over this link.

Since the problem is the same as in TCP, this patch makes the solution from
commit "05bb1fad1cde025a864a90cfeb98dcbefe78a44a"
       "[TCP]: Allow minimum RTO to be configurable via routing metrics."
available to DCCP.

This avoids reinventing the wheel, so that e.g. the following works in the
expected way now also for CCID-2:

> ip route change 10.0.0.2 rto_min 800 dev ath0

Luckily this useful rto_min function was recently moved to net/tcp.h,
which simplifies sharing code originating from TCP.

Documentation also updated (plus minor whitespace fixes).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c            |    5 +++--
 Documentation/networking/dccp.txt |   26 ++++++++++++++++++++------
 2 files changed, 23 insertions(+), 8 deletions(-)

--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -325,8 +325,9 @@ static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
 		hc->tx_srtt = m << 3;
 		hc->tx_mdev = m << 1;
 
-		hc->tx_mdev_max = max(TCP_RTO_MIN, hc->tx_mdev);
+		hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
 		hc->tx_rttvar   = hc->tx_mdev_max;
+
 		hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
 	} else {
 		/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
@@ -367,7 +368,7 @@ static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
 				hc->tx_rttvar -= (hc->tx_rttvar -
 						  hc->tx_mdev_max) >> 2;
 			hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
-			hc->tx_mdev_max = TCP_RTO_MIN;
+			hc->tx_mdev_max = tcp_rto_min(sk);
 		}
 	}
 
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -1,18 +1,20 @@
 DCCP protocol
-============
+=============
 
 
 Contents
 ========
-
 - Introduction
 - Missing features
 - Socket options
+- Sysctl variables
+- IOCTLs
+- Other tunables
 - Notes
 
+
 Introduction
 ============
-
 Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
 oriented protocol designed to solve issues present in UDP and TCP, particularly
 for real-time and multimedia (streaming) traffic.
@@ -29,9 +31,9 @@ It has a base protocol and pluggable congestion control IDs (CCIDs).
 DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol
 is at http://www.ietf.org/html.charters/dccp-charter.html
 
+
 Missing features
 ================
-
 The Linux DCCP implementation does not currently support all the features that are
 specified in RFCs 4340...42.
 
@@ -45,7 +47,6 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
 
 Socket options
 ==============
-
 DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
 service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
 the socket will fall back to 0 (which means that no meaningful service code
@@ -112,6 +113,7 @@ DCCP_SOCKOPT_CCID_TX_INFO
 On unidirectional connections it is useful to close the unused half-connection
 via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs.
 
+
 Sysctl variables
 ================
 Several DCCP default parameters can be managed by the following sysctls
@@ -155,15 +157,27 @@ sync_ratelimit = 125 ms
 	sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
 	of this parameter is milliseconds; a value of 0 disables rate-limiting.
 
+
 IOCTLS
 ======
 FIONREAD
 	Works as in udp(7): returns in the `int' argument pointer the size of
 	the next pending datagram in bytes, or 0 when no datagram is pending.
 
+
+Other tunables
+==============
+Per-route rto_min support
+	CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value
+	of the RTO timer. This setting can be modified via the 'rto_min' option
+	of iproute2; for example:
+		> ip route change 10.0.0.0/24   rto_min 250j dev wlan0
+		> ip route add    10.0.0.254/32 rto_min 800j dev wlan0
+		> ip route show dev wlan0
+
+
 Notes
 =====
-
 DCCP does not travel through NAT successfully at present on many boxes. This is
 because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
 support for DCCP has been added.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 4/5] dccp ccid-2: Share TCP's minimum RTO code
@ 2010-08-30  5:23           ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: dccp

Using a fixed RTO_MIN of 0.2 seconds was found to cause problems for CCID-2
over 802.11g: at least once per session there was a spurious timeout. It
helped to then increase the the value of RTO_MIN over this link.

Since the problem is the same as in TCP, this patch makes the solution from
commit "05bb1fad1cde025a864a90cfeb98dcbefe78a44a"
       "[TCP]: Allow minimum RTO to be configurable via routing metrics."
available to DCCP.

This avoids reinventing the wheel, so that e.g. the following works in the
expected way now also for CCID-2:

> ip route change 10.0.0.2 rto_min 800 dev ath0

Luckily this useful rto_min function was recently moved to net/tcp.h,
which simplifies sharing code originating from TCP.

Documentation also updated (plus minor whitespace fixes).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c            |    5 +++--
 Documentation/networking/dccp.txt |   26 ++++++++++++++++++++------
 2 files changed, 23 insertions(+), 8 deletions(-)

--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -325,8 +325,9 @@ static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
 		hc->tx_srtt = m << 3;
 		hc->tx_mdev = m << 1;
 
-		hc->tx_mdev_max = max(TCP_RTO_MIN, hc->tx_mdev);
+		hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
 		hc->tx_rttvar   = hc->tx_mdev_max;
+
 		hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
 	} else {
 		/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
@@ -367,7 +368,7 @@ static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
 				hc->tx_rttvar -= (hc->tx_rttvar -
 						  hc->tx_mdev_max) >> 2;
 			hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
-			hc->tx_mdev_max = TCP_RTO_MIN;
+			hc->tx_mdev_max = tcp_rto_min(sk);
 		}
 	}
 
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -1,18 +1,20 @@
 DCCP protocol
-======
+====== 
 
 Contents
 ====
-
 - Introduction
 - Missing features
 - Socket options
+- Sysctl variables
+- IOCTLs
+- Other tunables
 - Notes
 
+
 Introduction
 ======
-
 Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
 oriented protocol designed to solve issues present in UDP and TCP, particularly
 for real-time and multimedia (streaming) traffic.
@@ -29,9 +31,9 @@ It has a base protocol and pluggable congestion control IDs (CCIDs).
 DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol
 is at http://www.ietf.org/html.charters/dccp-charter.html
 
+
 Missing features
 ========
-
 The Linux DCCP implementation does not currently support all the features that are
 specified in RFCs 4340...42.
 
@@ -45,7 +47,6 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
 
 Socket options
 =======
-
 DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
 service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
 the socket will fall back to 0 (which means that no meaningful service code
@@ -112,6 +113,7 @@ DCCP_SOCKOPT_CCID_TX_INFO
 On unidirectional connections it is useful to close the unused half-connection
 via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs.
 
+
 Sysctl variables
 ========
 Several DCCP default parameters can be managed by the following sysctls
@@ -155,15 +157,27 @@ sync_ratelimit = 125 ms
 	sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
 	of this parameter is milliseconds; a value of 0 disables rate-limiting.
 
+
 IOCTLS
 ===
 FIONREAD
 	Works as in udp(7): returns in the `int' argument pointer the size of
 	the next pending datagram in bytes, or 0 when no datagram is pending.
 
+
+Other tunables
+=======
+Per-route rto_min support
+	CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value
+	of the RTO timer. This setting can be modified via the 'rto_min' option
+	of iproute2; for example:
+		> ip route change 10.0.0.0/24   rto_min 250j dev wlan0
+		> ip route add    10.0.0.254/32 rto_min 800j dev wlan0
+		> ip route show dev wlan0
+
+
 Notes
 ==-
 DCCP does not travel through NAT successfully at present on many boxes. This is
 because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
 support for DCCP has been added.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 5/5] dccp ccid-3: use per-route RTO or TCP RTO as fallback
@ 2010-08-30  5:23             ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker

This makes RTAX_RTO_MIN also available to CCID-3, replacing the compile-time
RTO lower bound with a per-route tunable value.

The original Kconfig option solved the problem that a very low RTT (in the
order of HZ) can trigger too frequent and unnecessary reductions of the
sending rate.

This tunable does not affect the initial RTO value of 2 seconds specified in
RFC 5348, section 4.2 and Appendix B. But like the hardcoded Kconfig value,
it allows to adapt to network conditions.

The same effect as the original Kconfig option of 100ms is now achieved by

> ip route replace to unicast 192.168.0.0/24 rto_min 100j dev eth0

(assuming HZ=1000).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/Kconfig            |   31 -------------------------------
 net/dccp/ccids/ccid3.c            |   11 +++++------
 net/dccp/ccids/ccid3.h            |    2 +-
 Documentation/networking/dccp.txt |    3 +++
 4 files changed, 9 insertions(+), 38 deletions(-)

--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
 
 	  If in doubt, say N.
 
-config IP_DCCP_CCID3_RTO
-	  int "Use higher bound for nofeedback timer"
-	  default 100
-	  depends on IP_DCCP_CCID3 && EXPERIMENTAL
-	  ---help---
-	    Use higher lower bound for nofeedback timer expiration.
-
-	    The TFRC nofeedback timer normally expires after the maximum of 4
-	    RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
-	    with a small RTT this can mean a high processing load and reduced
-	    performance, since then the nofeedback timer is triggered very
-	    frequently.
-
-	    This option enables to set a higher lower bound for the nofeedback
-	    value. Values in units of milliseconds can be set here.
-
-	    A value of 0 disables this feature by enforcing the value specified
-	    in RFC 3448. The following values have been suggested as bounds for
-	    experimental use:
-		* 16-20ms to match the typical multimedia inter-frame interval
-		* 100ms as a reasonable compromise [default]
-		* 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
-
-	    The default of 100ms is a compromise between a large value for
-	    efficient DCCP implementations, and a small value to avoid disrupting
-	    the network in times of congestion.
-
-	    The purpose of the nofeedback timer is to slow DCCP down when there
-	    is serious network congestion: experimenting with larger values should
-	    therefore not be performed on WANs.
-
 config IP_DCCP_TFRC_LIB
 	def_bool y if IP_DCCP_CCID3
 
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -460,13 +460,12 @@ done_computing_x:
 	sk->sk_write_space(sk);
 
 	/*
-	 * Update timeout interval for the nofeedback timer.
-	 * We use a configuration option to increase the lower bound.
-	 * This can help avoid triggering the nofeedback timer too
-	 * often ('spinning') on LANs with small RTTs.
+	 * Update timeout interval for the nofeedback timer. In order to control
+	 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
+	 * tunable RTAX_RTO_MIN value as the lower bound.
 	 */
-	hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO *
-						       (USEC_PER_SEC / 1000)));
+	hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
+				  USEC_PER_SEC/HZ * tcp_rto_min(sk));
 	/*
 	 * Schedule no feedback timer to expire in
 	 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,7 +42,7 @@
 #include "lib/tfrc.h"
 #include "../ccid.h"
 
-/* Two seconds as per RFC 3448 4.2 */
+/* Two seconds as per RFC 5348, 4.2 */
 #define TFRC_INITIAL_TIMEOUT	   (2 * USEC_PER_SEC)
 
 /* In usecs - half the scheduling granularity as per RFC3448 4.6 */
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -174,6 +174,9 @@ Per-route rto_min support
 		> ip route change 10.0.0.0/24   rto_min 250j dev wlan0
 		> ip route add    10.0.0.254/32 rto_min 800j dev wlan0
 		> ip route show dev wlan0
+	CCID-3 also supports the rto_min setting: it is used to define the lower
+	bound for the expiry of the nofeedback timer. This can be useful on LANs
+	with very low RTTs (e.g., loopback, Gbit ethernet).
 
 
 Notes

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 5/5] dccp ccid-3: use per-route RTO or TCP RTO as fallback
@ 2010-08-30  5:23             ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-08-30  5:23 UTC (permalink / raw)
  To: dccp

This makes RTAX_RTO_MIN also available to CCID-3, replacing the compile-time
RTO lower bound with a per-route tunable value.

The original Kconfig option solved the problem that a very low RTT (in the
order of HZ) can trigger too frequent and unnecessary reductions of the
sending rate.

This tunable does not affect the initial RTO value of 2 seconds specified in
RFC 5348, section 4.2 and Appendix B. But like the hardcoded Kconfig value,
it allows to adapt to network conditions.

The same effect as the original Kconfig option of 100ms is now achieved by

> ip route replace to unicast 192.168.0.0/24 rto_min 100j dev eth0

(assuming HZ\x1000).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/Kconfig            |   31 -------------------------------
 net/dccp/ccids/ccid3.c            |   11 +++++------
 net/dccp/ccids/ccid3.h            |    2 +-
 Documentation/networking/dccp.txt |    3 +++
 4 files changed, 9 insertions(+), 38 deletions(-)

--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
 
 	  If in doubt, say N.
 
-config IP_DCCP_CCID3_RTO
-	  int "Use higher bound for nofeedback timer"
-	  default 100
-	  depends on IP_DCCP_CCID3 && EXPERIMENTAL
-	  ---help---
-	    Use higher lower bound for nofeedback timer expiration.
-
-	    The TFRC nofeedback timer normally expires after the maximum of 4
-	    RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
-	    with a small RTT this can mean a high processing load and reduced
-	    performance, since then the nofeedback timer is triggered very
-	    frequently.
-
-	    This option enables to set a higher lower bound for the nofeedback
-	    value. Values in units of milliseconds can be set here.
-
-	    A value of 0 disables this feature by enforcing the value specified
-	    in RFC 3448. The following values have been suggested as bounds for
-	    experimental use:
-		* 16-20ms to match the typical multimedia inter-frame interval
-		* 100ms as a reasonable compromise [default]
-		* 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
-
-	    The default of 100ms is a compromise between a large value for
-	    efficient DCCP implementations, and a small value to avoid disrupting
-	    the network in times of congestion.
-
-	    The purpose of the nofeedback timer is to slow DCCP down when there
-	    is serious network congestion: experimenting with larger values should
-	    therefore not be performed on WANs.
-
 config IP_DCCP_TFRC_LIB
 	def_bool y if IP_DCCP_CCID3
 
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -460,13 +460,12 @@ done_computing_x:
 	sk->sk_write_space(sk);
 
 	/*
-	 * Update timeout interval for the nofeedback timer.
-	 * We use a configuration option to increase the lower bound.
-	 * This can help avoid triggering the nofeedback timer too
-	 * often ('spinning') on LANs with small RTTs.
+	 * Update timeout interval for the nofeedback timer. In order to control
+	 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
+	 * tunable RTAX_RTO_MIN value as the lower bound.
 	 */
-	hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO *
-						       (USEC_PER_SEC / 1000)));
+	hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
+				  USEC_PER_SEC/HZ * tcp_rto_min(sk));
 	/*
 	 * Schedule no feedback timer to expire in
 	 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,7 +42,7 @@
 #include "lib/tfrc.h"
 #include "../ccid.h"
 
-/* Two seconds as per RFC 3448 4.2 */
+/* Two seconds as per RFC 5348, 4.2 */
 #define TFRC_INITIAL_TIMEOUT	   (2 * USEC_PER_SEC)
 
 /* In usecs - half the scheduling granularity as per RFC3448 4.6 */
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -174,6 +174,9 @@ Per-route rto_min support
 		> ip route change 10.0.0.0/24   rto_min 250j dev wlan0
 		> ip route add    10.0.0.254/32 rto_min 800j dev wlan0
 		> ip route show dev wlan0
+	CCID-3 also supports the rto_min setting: it is used to define the lower
+	bound for the expiry of the nofeedback timer. This can be useful on LANs
+	with very low RTTs (e.g., loopback, Gbit ethernet).
 
 
 Notes

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion
  2010-08-30  5:23         ` Gerrit Renker
@ 2010-08-30 12:59           ` Ilpo Järvinen
  -1 siblings, 0 replies; 27+ messages in thread
From: Ilpo Järvinen @ 2010-08-30 12:59 UTC (permalink / raw)
  To: Gerrit Renker; +Cc: David Miller, dccp, Netdev

On Mon, 30 Aug 2010, Gerrit Renker wrote:

> This patch consolidates initial-window code common to TCP and CCID-2:
>  * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
>  * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).
> 
> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
> ---
>  include/net/tcp.h      |   15 +++++++++++++++
>  net/dccp/ccids/ccid2.c |    8 ++------
>  net/ipv4/tcp_input.c   |   17 ++---------------
>  3 files changed, 19 insertions(+), 21 deletions(-)
> 
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -789,6 +789,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
>  /* Use define here intentionally to get WARN_ON location shown at the caller */
>  #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
>  
> +/*
> + * Convert RFC 3390 larger initial window into an equivalent number of packets.
> + *
> + * John Heffner states:
> + *
> + *	The RFC specifies a window of no more than 4380 bytes
> + *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
> + *	is a bit misleading because they use a clamp at 4380 bytes
> + *	rather than a multiplier in the relevant range.
> + */
> +static inline u32 rfc3390_bytes_to_packets(const u32 smss)
> +{
> +	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
> +}
> +
>  extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
>  extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
>  
> --- a/net/dccp/ccids/ccid2.c
> +++ b/net/dccp/ccids/ccid2.c
> @@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
>  	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
>  	hc->tx_ssthresh = ~0U;
>  
> -	/*
> -	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
> -	 * packets for new connections, following the rules from [RFC3390]".
> -	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
> -	 */
> -	hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
> +	/* Use larger initial windows (RFC 4341, section 5). */
> +	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
>  
>  	/* Make sure that Ack Ratio is enabled and within bounds. */
>  	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
>  	}
>  }
>  
> -/* Numbers are taken from RFC3390.
> - *
> - * John Heffner states:
> - *
> - *	The RFC specifies a window of no more than 4380 bytes
> - *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
> - *	is a bit misleading because they use a clamp at 4380 bytes
> - *	rather than use a multiplier in the relevant range.
> - */
>  __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
>  {
>  	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
>  
> -	if (!cwnd) {
> -		if (tp->mss_cache > 1460)
> -			cwnd = 2;
> -		else
> -			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
> -	}
> +	if (!cwnd)
> +		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
>  	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
>  }

What is spelled out in tcp_select_initial_window might also need to follow 
the same logic?

-- 
 i.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390
@ 2010-08-30 12:59           ` Ilpo Järvinen
  0 siblings, 0 replies; 27+ messages in thread
From: Ilpo Järvinen @ 2010-08-30 12:59 UTC (permalink / raw)
  To: dccp

On Mon, 30 Aug 2010, Gerrit Renker wrote:

> This patch consolidates initial-window code common to TCP and CCID-2:
>  * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
>  * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).
> 
> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
> ---
>  include/net/tcp.h      |   15 +++++++++++++++
>  net/dccp/ccids/ccid2.c |    8 ++------
>  net/ipv4/tcp_input.c   |   17 ++---------------
>  3 files changed, 19 insertions(+), 21 deletions(-)
> 
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -789,6 +789,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
>  /* Use define here intentionally to get WARN_ON location shown at the caller */
>  #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
>  
> +/*
> + * Convert RFC 3390 larger initial window into an equivalent number of packets.
> + *
> + * John Heffner states:
> + *
> + *	The RFC specifies a window of no more than 4380 bytes
> + *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
> + *	is a bit misleading because they use a clamp at 4380 bytes
> + *	rather than a multiplier in the relevant range.
> + */
> +static inline u32 rfc3390_bytes_to_packets(const u32 smss)
> +{
> +	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
> +}
> +
>  extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
>  extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
>  
> --- a/net/dccp/ccids/ccid2.c
> +++ b/net/dccp/ccids/ccid2.c
> @@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
>  	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
>  	hc->tx_ssthresh = ~0U;
>  
> -	/*
> -	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
> -	 * packets for new connections, following the rules from [RFC3390]".
> -	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
> -	 */
> -	hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
> +	/* Use larger initial windows (RFC 4341, section 5). */
> +	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
>  
>  	/* Make sure that Ack Ratio is enabled and within bounds. */
>  	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
>  	}
>  }
>  
> -/* Numbers are taken from RFC3390.
> - *
> - * John Heffner states:
> - *
> - *	The RFC specifies a window of no more than 4380 bytes
> - *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
> - *	is a bit misleading because they use a clamp at 4380 bytes
> - *	rather than use a multiplier in the relevant range.
> - */
>  __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
>  {
>  	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
>  
> -	if (!cwnd) {
> -		if (tp->mss_cache > 1460)
> -			cwnd = 2;
> -		else
> -			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
> -	}
> +	if (!cwnd)
> +		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
>  	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
>  }

What is spelled out in tcp_select_initial_window might also need to follow 
the same logic?

-- 
 i.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP
  2010-08-30  5:23   ` Gerrit Renker
@ 2010-08-30 20:46     ` David Miller
  -1 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-08-30 20:46 UTC (permalink / raw)
  To: gerrit; +Cc: dccp, netdev

From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 30 Aug 2010 07:23:09 +0200

> All patches have been tested to compile independently and are at the top of
> git://eden-feed.erg.abdn.ac.uk/dccp_exp (subtree 'dccp').

WHat was this 'dccp' branch based upon?  I tried to pull it into
net-next-2.6 and got lots of unrelated changes and several
conflicts.

If you want me to pull your GIT tree it has to be clean,
against the proper tree, and I should only ever get your
specific changes and absolutely nothing unrealted.

I've applied the 5 patches by hand instead, they all
look fine, thanks!

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing
@ 2010-08-30 20:46     ` David Miller
  0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-08-30 20:46 UTC (permalink / raw)
  To: dccp

From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 30 Aug 2010 07:23:09 +0200

> All patches have been tested to compile independently and are at the top of
> git://eden-feed.erg.abdn.ac.uk/dccp_exp (subtree 'dccp').

WHat was this 'dccp' branch based upon?  I tried to pull it into
net-next-2.6 and got lots of unrelated changes and several
conflicts.

If you want me to pull your GIT tree it has to be clean,
against the proper tree, and I should only ever get your
specific changes and absolutely nothing unrealted.

I've applied the 5 patches by hand instead, they all
look fine, thanks!

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP
  2010-08-30  5:23   ` Gerrit Renker
@ 2010-08-31 10:38       ` gerrit
  -1 siblings, 0 replies; 27+ messages in thread
From: gerrit @ 2010-08-31 10:38 UTC (permalink / raw)
  To: David Miller; +Cc: gerrit, dccp, netdev

>> All patches have been tested to compile independently and are at the top
>> of  git://eden-feed.erg.abdn.ac.uk/dccp_exp (subtree 'dccp').
>
> What was this 'dccp' branch based upon?  I tried to pull it into
> net-next-2.6 and got lots of unrelated changes and several
> conflicts.
>
I am sorry, I did not expect you to pull, and should only have added the
http url for viewing. The dccp tree is based on netdev-2.6.

For any further submissions I will set up a separate, clean net-next-2.6
with just the submitted patches in it.

Thank you for your patience.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up;
@ 2010-08-31 10:38       ` gerrit
  0 siblings, 0 replies; 27+ messages in thread
From: gerrit @ 2010-08-31 10:38 UTC (permalink / raw)
  To: dccp

>> All patches have been tested to compile independently and are at the top
>> of  git://eden-feed.erg.abdn.ac.uk/dccp_exp (subtree 'dccp').
>
> What was this 'dccp' branch based upon?  I tried to pull it into
> net-next-2.6 and got lots of unrelated changes and several
> conflicts.
>
I am sorry, I did not expect you to pull, and should only have added the
http url for viewing. The dccp tree is based on netdev-2.6.

For any further submissions I will set up a separate, clean net-next-2.6
with just the submitted patches in it.

Thank you for your patience.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion
  2010-08-30  5:23         ` Gerrit Renker
@ 2010-09-01  5:23             ` Gerrit Renker
  -1 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-09-01  5:23 UTC (permalink / raw)
  To: Ilpo J?rvinen; +Cc: David Miller, dccp, Netdev

| > This patch consolidates initial-window code common to TCP and CCID-2:
| >  * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
| >  * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).
...
| > +static inline u32 rfc3390_bytes_to_packets(const u32 smss)
| > +{
| > +	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
| > +}
| > +
...
| 
| What is spelled out in tcp_select_initial_window might also need to follow 
| the same logic?
| 
Thank you for paying attention, can you please have a look at the subsequent
patch. I went through this yesterday and found that in 4 different places 
there are 4 different interpretations of what RFC 3390 means.

Having asked someone more familiar with IETF issues, it seems that the RFC 5681
interpretation of RFC 3390 is now the 'official' one.

+------------+--------------------+--------------------+--------------------+--------------------+
| Condition  |     RFC 2414       |        Linux       |       Linux        |     RFC 5681       |
|            |     RFC 3390       |      tcp_input     |     tcp_output     |                    |
+------------+--------------------+--------------------+--------------------+--------------------+
| 2 segments | 2190 < MSS         | 1460 < MSS         | 4380 < MSS         | 2190 < MSS         |
| 3 segments | 1095 < MSS <= 1460 | 1095 < MSS <= 1460 | 1460 < MSS <= 4380 | 1095 < MSS <= 2190 |
| 4 segments |        MSS <= 1095 |        MSS <= 1095 |        MSS <= 1460 |        MSS <= 1095 |
+------------+--------------------+--------------------+--------------------+--------------------+

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390
@ 2010-09-01  5:23             ` Gerrit Renker
  0 siblings, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-09-01  5:23 UTC (permalink / raw)
  To: dccp

| > This patch consolidates initial-window code common to TCP and CCID-2:
| >  * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
| >  * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).
...
| > +static inline u32 rfc3390_bytes_to_packets(const u32 smss)
| > +{
| > +	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
| > +}
| > +
...
| 
| What is spelled out in tcp_select_initial_window might also need to follow 
| the same logic?
| 
Thank you for paying attention, can you please have a look at the subsequent
patch. I went through this yesterday and found that in 4 different places 
there are 4 different interpretations of what RFC 3390 means.

Having asked someone more familiar with IETF issues, it seems that the RFC 5681
interpretation of RFC 3390 is now the 'official' one.

+------------+--------------------+--------------------+--------------------+--------------------+
| Condition  |     RFC 2414       |        Linux       |       Linux        |     RFC 5681       |
|            |     RFC 3390       |      tcp_input     |     tcp_output     |                    |
+------------+--------------------+--------------------+--------------------+--------------------+
| 2 segments | 2190 < MSS         | 1460 < MSS         | 4380 < MSS         | 2190 < MSS         |
| 3 segments | 1095 < MSS <= 1460 | 1095 < MSS <= 1460 | 1460 < MSS <= 4380 | 1095 < MSS <= 2190 |
| 4 segments |        MSS <= 1095 |        MSS <= 1095 |        MSS <= 1460 |        MSS <= 1095 |
+------------+--------------------+--------------------+--------------------+--------------------+

^ permalink raw reply	[flat|nested] 27+ messages in thread

* net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output
  2010-09-01  5:23             ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 Gerrit Renker
  (?)
@ 2010-09-01  5:34             ` Gerrit Renker
  2010-09-01  7:06               ` Alexander Zimmermann
  -1 siblings, 1 reply; 27+ messages in thread
From: Gerrit Renker @ 2010-09-01  5:34 UTC (permalink / raw)
  To: Ilpo J?rvinen, David Miller, Netdev

Please can you have a look, I have checked it, appears to be correct
and compiles cleanly. Thanks again to Ilpo.

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Patch <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
tcp: update also tcp_output with regard to RFC 5681

Thanks to Ilpo Jarvinen, his updates also the initial window
setting for tcp_output with regard to RFC 5681.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/ipv4/tcp_output.c |   10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -225,15 +225,11 @@ void tcp_select_initial_window(int __spa
 	}
 
 	/* Set initial window to value enough for senders,
-	 * following RFC2414. Senders, not following this RFC,
-	 * will be satisfied with 2.
+	 * following RFC5681 (which updates RFC3390).
 	 */
 	if (mss > (1 << *rcv_wscale)) {
-		int init_cwnd = 4;
-		if (mss > 1460 * 3)
-			init_cwnd = 2;
-		else if (mss > 1460)
-			init_cwnd = 3;
+		int init_cwnd = rfc3390_bytes_to_packets(mss);
+
 		/* when initializing use the value from init_rcv_wnd
 		 * rather than the default from above
 		 */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion
  2010-08-30  5:23         ` Gerrit Renker
@ 2010-09-01  7:05               ` Alexander Zimmermann
  -1 siblings, 0 replies; 27+ messages in thread
From: Alexander Zimmermann @ 2010-09-01  7:05 UTC (permalink / raw)
  To: Gerrit Renker; +Cc: Ilpo J?rvinen, David Miller, dccp, Netdev

[-- Attachment #1: Type: text/plain, Size: 2606 bytes --]

Hi Gerrit,

Am 01.09.2010 um 07:23 schrieb Gerrit Renker:

> | > This patch consolidates initial-window code common to TCP and CCID-2:
> | >  * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
> | >  * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).
> ...
> | > +static inline u32 rfc3390_bytes_to_packets(const u32 smss)
> | > +{
> | > +	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
> | > +}
> | > +
> ...
> | 
> | What is spelled out in tcp_select_initial_window might also need to follow 
> | the same logic?
> | 
> Thank you for paying attention, can you please have a look at the subsequent
> patch. I went through this yesterday and found that in 4 different places 
> there are 4 different interpretations of what RFC 3390 means.
> 
> Having asked someone more familiar with IETF issues, it seems that the RFC 5681
> interpretation of RFC 3390 is now the 'official' one.
> 
> +------------+--------------------+--------------------+--------------------+--------------------+
> | Condition  |     RFC 2414       |        Linux       |       Linux        |     RFC 5681       |
> |            |     RFC 3390       |      tcp_input     |     tcp_output     |                    |
> +------------+--------------------+--------------------+--------------------+--------------------+
> | 2 segments | 2190 < MSS         | 1460 < MSS         | 4380 < MSS         | 2190 < MSS         |
> | 3 segments | 1095 < MSS <= 1460 | 1095 < MSS <= 1460 | 1460 < MSS <= 4380 | 1095 < MSS <= 2190 |

                              ^^^^^

Are you sure about this?

RFC3390 says (page 2):
 If (1095 bytes < MSS < 2190 bytes) then win <= 4380;

So, IMO it should be 1095 < MSS < 2190 for 3 segments, and 2190 <= MSS for 2 segments
in the "RFC 3390" column.

BTW: I think that we should not reference RFC2414, since RFC3390 obsoletes it.

Alex


> | 4 segments |        MSS <= 1095 |        MSS <= 1095 |        MSS <= 1460 |        MSS <= 1095 |
> +------------+--------------------+--------------------+--------------------+--------------------+
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

//
// Dipl.-Inform. Alexander Zimmermann
// Department of Computer Science, Informatik 4
// RWTH Aachen University
// Ahornstr. 55, 52056 Aachen, Germany
// phone: (49-241) 80-21422, fax: (49-241) 80-22221
// email: zimmermann@cs.rwth-aachen.de
// web: http://www.umic-mesh.net
//


[-- Attachment #2: Signierter Teil der Nachricht --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390
@ 2010-09-01  7:05               ` Alexander Zimmermann
  0 siblings, 0 replies; 27+ messages in thread
From: Alexander Zimmermann @ 2010-09-01  7:05 UTC (permalink / raw)
  To: dccp

[-- Attachment #1: Type: text/plain, Size: 2606 bytes --]

Hi Gerrit,

Am 01.09.2010 um 07:23 schrieb Gerrit Renker:

> | > This patch consolidates initial-window code common to TCP and CCID-2:
> | >  * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
> | >  * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).
> ...
> | > +static inline u32 rfc3390_bytes_to_packets(const u32 smss)
> | > +{
> | > +	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
> | > +}
> | > +
> ...
> | 
> | What is spelled out in tcp_select_initial_window might also need to follow 
> | the same logic?
> | 
> Thank you for paying attention, can you please have a look at the subsequent
> patch. I went through this yesterday and found that in 4 different places 
> there are 4 different interpretations of what RFC 3390 means.
> 
> Having asked someone more familiar with IETF issues, it seems that the RFC 5681
> interpretation of RFC 3390 is now the 'official' one.
> 
> +------------+--------------------+--------------------+--------------------+--------------------+
> | Condition  |     RFC 2414       |        Linux       |       Linux        |     RFC 5681       |
> |            |     RFC 3390       |      tcp_input     |     tcp_output     |                    |
> +------------+--------------------+--------------------+--------------------+--------------------+
> | 2 segments | 2190 < MSS         | 1460 < MSS         | 4380 < MSS         | 2190 < MSS         |
> | 3 segments | 1095 < MSS <= 1460 | 1095 < MSS <= 1460 | 1460 < MSS <= 4380 | 1095 < MSS <= 2190 |

                              ^^^^^

Are you sure about this?

RFC3390 says (page 2):
 If (1095 bytes < MSS < 2190 bytes) then win <= 4380;

So, IMO it should be 1095 < MSS < 2190 for 3 segments, and 2190 <= MSS for 2 segments
in the "RFC 3390" column.

BTW: I think that we should not reference RFC2414, since RFC3390 obsoletes it.

Alex


> | 4 segments |        MSS <= 1095 |        MSS <= 1095 |        MSS <= 1460 |        MSS <= 1095 |
> +------------+--------------------+--------------------+--------------------+--------------------+
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

//
// Dipl.-Inform. Alexander Zimmermann
// Department of Computer Science, Informatik 4
// RWTH Aachen University
// Ahornstr. 55, 52056 Aachen, Germany
// phone: (49-241) 80-21422, fax: (49-241) 80-22221
// email: zimmermann@cs.rwth-aachen.de
// web: http://www.umic-mesh.net
//


[-- Attachment #2: Signierter Teil der Nachricht --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output
  2010-09-01  5:34             ` net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output Gerrit Renker
@ 2010-09-01  7:06               ` Alexander Zimmermann
  2010-09-01 10:16                 ` Gerrit Renker
  2010-09-01 10:28                 ` Gerrit Renker
  0 siblings, 2 replies; 27+ messages in thread
From: Alexander Zimmermann @ 2010-09-01  7:06 UTC (permalink / raw)
  To: Gerrit Renker; +Cc: Ilpo J?rvinen, David Miller, Netdev

[-- Attachment #1: Type: text/plain, Size: 1766 bytes --]

Hi Gerrit,

Am 01.09.2010 um 07:34 schrieb Gerrit Renker:

> Please can you have a look, I have checked it, appears to be correct
> and compiles cleanly. Thanks again to Ilpo.
> 
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Patch <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
> tcp: update also tcp_output with regard to RFC 5681
> 
> Thanks to Ilpo Jarvinen, his updates also the initial window
> setting for tcp_output with regard to RFC 5681.
> 
> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
> ---
> net/ipv4/tcp_output.c |   10 +++-------
> 1 file changed, 3 insertions(+), 7 deletions(-)
> 
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -225,15 +225,11 @@ void tcp_select_initial_window(int __spa
> 	}
> 
> 	/* Set initial window to value enough for senders,
> -	 * following RFC2414. Senders, not following this RFC,
> -	 * will be satisfied with 2.
> +	 * following RFC5681 (which updates RFC3390).

In IETF sense, RFC5681 doesn't update RFC3390.

Alex


> 	 */
> 	if (mss > (1 << *rcv_wscale)) {
> -		int init_cwnd = 4;
> -		if (mss > 1460 * 3)
> -			init_cwnd = 2;
> -		else if (mss > 1460)
> -			init_cwnd = 3;
> +		int init_cwnd = rfc3390_bytes_to_packets(mss);
> +
> 		/* when initializing use the value from init_rcv_wnd
> 		 * rather than the default from above
> 		 */
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

//
// Dipl.-Inform. Alexander Zimmermann
// Department of Computer Science, Informatik 4
// RWTH Aachen University
// Ahornstr. 55, 52056 Aachen, Germany
// phone: (49-241) 80-21422, fax: (49-241) 80-22221
// email: zimmermann@cs.rwth-aachen.de
// web: http://www.umic-mesh.net
//


[-- Attachment #2: Signierter Teil der Nachricht --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output
  2010-09-01  7:06               ` Alexander Zimmermann
@ 2010-09-01 10:16                 ` Gerrit Renker
  2010-09-01 10:28                 ` Gerrit Renker
  1 sibling, 0 replies; 27+ messages in thread
From: Gerrit Renker @ 2010-09-01 10:16 UTC (permalink / raw)
  To: Alexander Zimmermann; +Cc: Ilpo J?rvinen, David Miller, Netdev

|  > --- a/net/ipv4/tcp_output.c
| > +++ b/net/ipv4/tcp_output.c
| > @@ -225,15 +225,11 @@ void tcp_select_initial_window(int __spa
| > 	}
| > 
| > 	/* Set initial window to value enough for senders,
| > -	 * following RFC2414. Senders, not following this RFC,
| > -	 * will be satisfied with 2.
| > +	 * following RFC5681 (which updates RFC3390).
| 
| In IETF sense, RFC5681 doesn't update RFC3390.
| 
RFC 3390 updates RFC 2581, and RFC 5681 replaces RFC 2581.
That was too long to write.

I will remove the comment.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output
  2010-09-01  7:06               ` Alexander Zimmermann
  2010-09-01 10:16                 ` Gerrit Renker
@ 2010-09-01 10:28                 ` Gerrit Renker
  2010-09-02  1:18                   ` David Miller
  1 sibling, 1 reply; 27+ messages in thread
From: Gerrit Renker @ 2010-09-01 10:28 UTC (permalink / raw)
  To: Ilpo J?rvinen; +Cc: David Miller, Netdev

Addressing comments from Alexander, removing comment.

>>>>>>>>>>>>>>>>>>>>>>> Patch v2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
tcp: update also tcp_output with regard to RFC 5681

Thanks to Ilpo Jarvinen, this updates also the initial window
setting for tcp_output with regard to RFC 5681.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/ipv4/tcp_output.c |   12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -224,16 +224,10 @@ void tcp_select_initial_window(int __spa
 		}
 	}
 
-	/* Set initial window to value enough for senders,
-	 * following RFC2414. Senders, not following this RFC,
-	 * will be satisfied with 2.
-	 */
+	/* Set initial window to value enough for senders, following RFC5681. */
 	if (mss > (1 << *rcv_wscale)) {
-		int init_cwnd = 4;
-		if (mss > 1460 * 3)
-			init_cwnd = 2;
-		else if (mss > 1460)
-			init_cwnd = 3;
+		int init_cwnd = rfc3390_bytes_to_packets(mss);
+
 		/* when initializing use the value from init_rcv_wnd
 		 * rather than the default from above
 		 */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output
  2010-09-01 10:28                 ` Gerrit Renker
@ 2010-09-02  1:18                   ` David Miller
  0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-09-02  1:18 UTC (permalink / raw)
  To: gerrit; +Cc: ilpo.jarvinen, netdev

From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 1 Sep 2010 12:28:35 +0200

> Addressing comments from Alexander, removing comment.
> 
>>>>>>>>>>>>>>>>>>>>>>>> Patch v2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
> tcp: update also tcp_output with regard to RFC 5681
> 
> Thanks to Ilpo Jarvinen, this updates also the initial window
> setting for tcp_output with regard to RFC 5681.
> 
> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>

Applied to net-next-2.6, thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2010-09-02  1:18 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <ccid2_and_tcp_code_sharing>
2010-08-30  5:23 ` net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP Gerrit Renker
2010-08-30  5:23   ` Gerrit Renker
2010-08-30  5:23   ` [PATCH 1/5] dccp ccid-2: Use u32 timestamps uniformly Gerrit Renker
2010-08-30  5:23     ` Gerrit Renker
2010-08-30  5:23     ` [PATCH 2/5] dccp ccid-2: Remove wrappers around sk_{reset,stop}_timer() Gerrit Renker
2010-08-30  5:23       ` Gerrit Renker
2010-08-30  5:23       ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion Gerrit Renker
2010-08-30  5:23         ` Gerrit Renker
2010-08-30  5:23         ` [PATCH 4/5] dccp ccid-2: Share TCP's minimum RTO code Gerrit Renker
2010-08-30  5:23           ` Gerrit Renker
2010-08-30  5:23           ` [PATCH 5/5] dccp ccid-3: use per-route RTO or TCP RTO as fallback Gerrit Renker
2010-08-30  5:23             ` Gerrit Renker
2010-08-30 12:59         ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion Ilpo Järvinen
2010-08-30 12:59           ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 Ilpo Järvinen
2010-09-01  5:23           ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion Gerrit Renker
2010-09-01  5:23             ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 Gerrit Renker
2010-09-01  5:34             ` net-next-2.6 [PATCH 1/1] tcp: add missing initial window (RFC 3390) for tcp_output Gerrit Renker
2010-09-01  7:06               ` Alexander Zimmermann
2010-09-01 10:16                 ` Gerrit Renker
2010-09-01 10:28                 ` Gerrit Renker
2010-09-02  1:18                   ` David Miller
2010-09-01  7:05             ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 conversion Alexander Zimmermann
2010-09-01  7:05               ` [PATCH 3/5] tcp/dccp: Consolidate common code for RFC 3390 Alexander Zimmermann
2010-08-30 20:46   ` net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP David Miller
2010-08-30 20:46     ` net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing David Miller
2010-08-31 10:38     ` net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; code sharing between TCP and DCCP gerrit
2010-08-31 10:38       ` net-2.6 [PATCH 0/5] dccp: ccid-2 clean-up; gerrit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.