All of lore.kernel.org
 help / color / mirror / Atom feed
* [net-next PATCH v3 3/3] net: TCP thin dupack
@ 2010-02-11 12:07 Andreas Petlund
  2010-02-12 11:19 ` William Allen Simpson
  2010-02-13  2:13 ` Eric W. Biederman
  0 siblings, 2 replies; 6+ messages in thread
From: Andreas Petlund @ 2010-02-11 12:07 UTC (permalink / raw)
  To: netdev
  Cc: Ilpo Järvinen, Eric Dumazet, Arnd Hannemann, LKML,
	shemminger, David Miller, william.allen.simpson, damian

Major changes:
      -Possible to disable mechanisms by socket option
      -Socket option value boundary check


Signed-off-by: Andreas Petlund <apetlund@simula.no>
---
 include/linux/sysctl.h     |    1 +
 include/linux/tcp.h        |    4 +++-
 include/net/tcp.h          |    1 +
 net/ipv4/sysctl_net_ipv4.c |    7 +++++++
 net/ipv4/tcp.c             |    7 +++++++
 net/ipv4/tcp_input.c       |   11 +++++++++++
 6 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d840d75..ded3f20 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -426,6 +426,7 @@ enum
 	NET_TCP_MAX_SSTHRESH=124,
 	NET_TCP_FRTO_RESPONSE=125,
 	NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
+	NET_TCP_FORCE_THIN_LINEAR_DUPACK=127,
 };
 
 enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 67da706..c30ed17 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -104,6 +104,7 @@ enum {
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
 #define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 #define TCP_THIN_LT             16      /* Use linear timeouts for thin streams*/
+#define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
@@ -343,7 +344,8 @@ struct tcp_sock {
 	u8	frto_counter;	/* Number of new acks after RTO */
 	u8	nonagle;	/* Disable Nagle algorithm?             */
 	u8      thin_lt     : 1,/* Use linear timeouts for thin streams */
-		thin_undef  : 7;
+		thin_dupack : 1,/* Fast retransmit on first dupack      */
+		thin_undef  : 6;
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bc5856a..af1253c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -245,6 +245,7 @@ extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
 extern int sysctl_tcp_cookie_size;
 extern int sysctl_tcp_force_thin_linear_timeouts;
+extern int sysctl_tcp_force_thin_dupack;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index cb2ed35..b097a58 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -582,6 +582,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec
 	},
+        {
+		.procname       = "tcp_force_thin_dupack",
+		.data           = &sysctl_tcp_force_thin_dupack,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec
+	},
 	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ce9aeb0..6d7cb9c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2236,6 +2236,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->thin_lt = val;
 		break;
 
+	case TCP_THIN_DUPACK:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->thin_dupack = val;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 28e0296..c5a73ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_nometrics_save __read_mostly;
 
+int sysctl_tcp_force_thin_dupack __read_mostly;
+
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_abc __read_mostly;
 
@@ -2447,6 +2449,15 @@ static int tcp_time_to_recover(struct sock *sk)
 		return 1;
 	}
 
+	/* If a thin stream is detected, retransmit after first
+	 * received dupack. Employ only if SACK is supported in order
+	 * to avoid possible corner-case series of spurious retransmissions
+	 * Use only if there are no unsent data. */
+	if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
+	    tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
+	    tcp_is_sack(tp) && sk->sk_send_head == NULL)
+		return 1;
+
 	return 0;
 }
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [net-next PATCH v3 3/3] net: TCP thin dupack
  2010-02-11 12:07 [net-next PATCH v3 3/3] net: TCP thin dupack Andreas Petlund
@ 2010-02-12 11:19 ` William Allen Simpson
  2010-02-12 11:43   ` Ilpo Järvinen
  2010-02-13 15:50   ` Andreas Petlund
  2010-02-13  2:13 ` Eric W. Biederman
  1 sibling, 2 replies; 6+ messages in thread
From: William Allen Simpson @ 2010-02-12 11:19 UTC (permalink / raw)
  To: Andreas Petlund
  Cc: netdev, Ilpo Järvinen, Eric Dumazet, Arnd Hannemann, LKML,
	shemminger, David Miller, damian

Last year, I'm pretty sure I was on record as thinking this is *not* a
good idea.  But at least it now requires a sysctl to turn on, and
should default to off.

Also that naming was a bit dicey.  Now the names are more descriptive,
but the "force" is a bit overkill.

How about:
   NET_TCP_FORCE_THIN_LINEAR_DUPACK -> NET_TCP_THIN_LINEAR_DUPACK
   tcp_force_thin_dupack            -> tcp_thin_linear_dupack
   sysctl_tcp_force_thin_dupack     -> sysctl_tcp_thin_linear_dupack


Andreas Petlund wrote:
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 28e0296..c5a73ab 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
>  int sysctl_tcp_frto_response __read_mostly;
>  int sysctl_tcp_nometrics_save __read_mostly;
>  
> +int sysctl_tcp_force_thin_dupack __read_mostly;
> +
>  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;

Where is the sysctl initialized?


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [net-next PATCH v3 3/3] net: TCP thin dupack
  2010-02-12 11:19 ` William Allen Simpson
@ 2010-02-12 11:43   ` Ilpo Järvinen
  2010-02-13 15:50   ` Andreas Petlund
  1 sibling, 0 replies; 6+ messages in thread
From: Ilpo Järvinen @ 2010-02-12 11:43 UTC (permalink / raw)
  To: William Allen Simpson
  Cc: Andreas Petlund, netdev, Eric Dumazet, Arnd Hannemann, LKML,
	shemminger, David Miller, damian

On Fri, 12 Feb 2010, William Allen Simpson wrote:

> Andreas Petlund wrote:
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 28e0296..c5a73ab 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
> >  int sysctl_tcp_frto_response __read_mostly;
> >  int sysctl_tcp_nometrics_save __read_mostly;
> >  +int sysctl_tcp_force_thin_dupack __read_mostly;
> > +
> >  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
> 
> Where is the sysctl initialized?

...That's offloaded to the compiler, like is done with some of the above 
ones too. There's nothing to worry in that.

-- 
 i.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [net-next PATCH v3 3/3] net: TCP thin dupack
  2010-02-11 12:07 [net-next PATCH v3 3/3] net: TCP thin dupack Andreas Petlund
  2010-02-12 11:19 ` William Allen Simpson
@ 2010-02-13  2:13 ` Eric W. Biederman
  2010-02-13 15:50   ` Andreas Petlund
  1 sibling, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2010-02-13  2:13 UTC (permalink / raw)
  To: Andreas Petlund
  Cc: netdev, Ilpo Järvinen, Eric Dumazet, Arnd Hannemann, LKML,
	shemminger, David Miller, william.allen.simpson, damian

Andreas Petlund <apetlund@simula.no> writes:

> Major changes:
>       -Possible to disable mechanisms by socket option
>       -Socket option value boundary check
>
>
> Signed-off-by: Andreas Petlund <apetlund@simula.no>
> ---
>  include/linux/sysctl.h     |    1 +
>  include/linux/tcp.h        |    4 +++-
>  include/net/tcp.h          |    1 +
>  net/ipv4/sysctl_net_ipv4.c |    7 +++++++
>  net/ipv4/tcp.c             |    7 +++++++
>  net/ipv4/tcp_input.c       |   11 +++++++++++
>  6 files changed, 30 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index d840d75..ded3f20 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -426,6 +426,7 @@ enum
>  	NET_TCP_MAX_SSTHRESH=124,
>  	NET_TCP_FRTO_RESPONSE=125,
>  	NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
> +	NET_TCP_FORCE_THIN_LINEAR_DUPACK=127,

There is no need to allocate a binary sysctl here.

Eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [net-next PATCH v3 3/3] net: TCP thin dupack
  2010-02-12 11:19 ` William Allen Simpson
  2010-02-12 11:43   ` Ilpo Järvinen
@ 2010-02-13 15:50   ` Andreas Petlund
  1 sibling, 0 replies; 6+ messages in thread
From: Andreas Petlund @ 2010-02-13 15:50 UTC (permalink / raw)
  To: William Allen Simpson
  Cc: netdev, Ilpo Järvinen, Eric Dumazet, Arnd Hannemann, LKML,
	shemminger, David Miller, damian

On 12. feb. 2010 12:19, William Allen Simpson wrote:
> Last year, I'm pretty sure I was on record as thinking this is *not* a
> good idea.  But at least it now requires a sysctl to turn on, and
> should default to off.
> 
> Also that naming was a bit dicey.  Now the names are more descriptive,
> but the "force" is a bit overkill.
> 
> How about:
>   NET_TCP_FORCE_THIN_LINEAR_DUPACK -> NET_TCP_THIN_LINEAR_DUPACK
>   tcp_force_thin_dupack            -> tcp_thin_linear_dupack
>   sysctl_tcp_force_thin_dupack     -> sysctl_tcp_thin_linear_dupack

You uncovered a copy/paste/edit-typo there. The term "linear" had snuck
in even though it does not make sense for this patch. I think that
NET_TCP_THIN_DUPACK, tcp_thin_dupack and sysctl_tcp_thin_dupack will
be better.

Best regards,
Andreas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [net-next PATCH v3 3/3] net: TCP thin dupack
  2010-02-13  2:13 ` Eric W. Biederman
@ 2010-02-13 15:50   ` Andreas Petlund
  0 siblings, 0 replies; 6+ messages in thread
From: Andreas Petlund @ 2010-02-13 15:50 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: netdev, Ilpo Järvinen, Eric Dumazet, Arnd Hannemann, LKML,
	shemminger, David Miller, william.allen.simpson, damian

On 13. feb. 2010 03:13, Eric W. Biederman wrote:
> Andreas Petlund <apetlund@simula.no> writes:
> 
>> Major changes:
>>       -Possible to disable mechanisms by socket option
>>       -Socket option value boundary check
>>
>>
>> Signed-off-by: Andreas Petlund <apetlund@simula.no>
>> ---
>>  include/linux/sysctl.h     |    1 +
>>  include/linux/tcp.h        |    4 +++-
>>  include/net/tcp.h          |    1 +
>>  net/ipv4/sysctl_net_ipv4.c |    7 +++++++
>>  net/ipv4/tcp.c             |    7 +++++++
>>  net/ipv4/tcp_input.c       |   11 +++++++++++
>>  6 files changed, 30 insertions(+), 1 deletions(-)
>>
>> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
>> index d840d75..ded3f20 100644
>> --- a/include/linux/sysctl.h
>> +++ b/include/linux/sysctl.h
>> @@ -426,6 +426,7 @@ enum
>>  	NET_TCP_MAX_SSTHRESH=124,
>>  	NET_TCP_FRTO_RESPONSE=125,
>>  	NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
>> +	NET_TCP_FORCE_THIN_LINEAR_DUPACK=127,
> 
> There is no need to allocate a binary sysctl here.
> 
> Eric

Thanks. I'll address this in the next iteration.

Best regrads,
Andreas

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-02-13 15:58 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-11 12:07 [net-next PATCH v3 3/3] net: TCP thin dupack Andreas Petlund
2010-02-12 11:19 ` William Allen Simpson
2010-02-12 11:43   ` Ilpo Järvinen
2010-02-13 15:50   ` Andreas Petlund
2010-02-13  2:13 ` Eric W. Biederman
2010-02-13 15:50   ` Andreas Petlund

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.