All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp
@ 2010-06-21 21:48 Florian Westphal
  2010-06-21 21:48 ` [PATCH 2/2] syncookies: add support for ECN Florian Westphal
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Florian Westphal @ 2010-06-21 21:48 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

As pointed out by Fernando Gont there is no need to encode rcv_wscale
into the cookie.

We did not use the restored rcv_wscale anyway; it is recomputed
via tcp_select_initial_window().

Thus we can save 4 bits in the ts option space by removing rcv_wscale.
In case window scaling was not supported, we set the (invalid) wscale
value 0xf.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/ipv4/syncookies.c |   35 ++++++++++++++---------------------
 net/ipv6/syncookies.c |    1 -
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 51b5662..8896329 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -18,8 +18,8 @@
 #include <net/tcp.h>
 #include <net/route.h>
 
-/* Timestamps: lowest 9 bits store TCP options */
-#define TSBITS 9
+/* Timestamps: lowest bits store TCP options */
+#define TSBITS 5
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 
 /*
  * when syncookies are in effect and tcp timestamps are enabled we encode
- * tcp options in the lowest 9 bits of the timestamp value that will be
+ * tcp options in the lower bits of the timestamp value that will be
  * sent in the syn-ack.
  * Since subsequent timestamps use the normal tcp_time_stamp value, we
  * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
@@ -70,11 +70,9 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 	u32 options = 0;
 
 	ireq = inet_rsk(req);
-	if (ireq->wscale_ok) {
-		options = ireq->snd_wscale;
-		options |= ireq->rcv_wscale << 4;
-	}
-	options |= ireq->sack_ok << 8;
+
+	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
+	options |= ireq->sack_ok << 4;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -227,15 +225,14 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * additional tcp options in the timestamp.
  * This extracts these options from the timestamp echo.
  *
- * The lowest 4 bits are for snd_wscale
- * The next 4 lsb are for rcv_wscale
+ * The lowest 4 bits store snd_wscale.
  * The next lsb is for sack_ok
  *
  * return false if we decode an option that should not be.
  */
 bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
-	/* echoed timestamp, 9 lowest bits contain options */
+	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
 	if (!tcp_opt->saw_tstamp)  {
@@ -246,20 +243,17 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 	if (!sysctl_tcp_timestamps)
 		return false;
 
-	tcp_opt->snd_wscale = options & 0xf;
-	options >>= 4;
-	tcp_opt->rcv_wscale = options & 0xf;
-
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
-		tcp_opt->wscale_ok = 1;
-		return sysctl_tcp_window_scaling != 0;
-	}
-	return true;
+	if ((options & 0xf) == 0xf)
+		return true; /* no window scaling */
+
+	tcp_opt->wscale_ok = 1;
+	tcp_opt->snd_wscale = options & 0xf;
+	return sysctl_tcp_window_scaling != 0;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -313,7 +307,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c7ee574..84d818c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -217,7 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] syncookies: add support for ECN
  2010-06-21 21:48 [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Florian Westphal
@ 2010-06-21 21:48 ` Florian Westphal
  2010-06-27  5:00   ` David Miller
  2010-06-23 20:28 ` [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Hagen Paul Pfeifer
  2010-06-27  5:00 ` David Miller
  2 siblings, 1 reply; 8+ messages in thread
From: Florian Westphal @ 2010-06-21 21:48 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

Allows use of ECN when syncookies are in effect by encoding ecn_ok
into the syn-ack tcp timestamp.

While at it, remove a uneeded #ifdef CONFIG_SYN_COOKIES.
With CONFIG_SYN_COOKIES=nm want_cookie is ifdef'd to 0 and gcc
removes the "if (0)".

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/tcp.h     |    2 +-
 net/ipv4/syncookies.c |   15 ++++++++++-----
 net/ipv4/tcp_ipv4.c   |    6 ++----
 net/ipv6/syncookies.c |    5 +++--
 net/ipv6/tcp_ipv6.c   |    2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 18c246c..c2f96c2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8896329..650cace 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,7 +19,7 @@
 #include <net/route.h>
 
 /* Timestamps: lowest bits store TCP options */
-#define TSBITS 5
+#define TSBITS 6
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -73,6 +73,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 
 	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
 	options |= ireq->sack_ok << 4;
+	options |= ireq->ecn_ok << 5;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -226,11 +227,11 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * This extracts these options from the timestamp echo.
  *
  * The lowest 4 bits store snd_wscale.
- * The next lsb is for sack_ok
+ * next 2 bits indicate SACK and ECN support.
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -244,6 +245,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 		return false;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	*ecn_ok = (options >> 5) & 1;
+	if (*ecn_ok && !sysctl_tcp_ecn)
+		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
@@ -272,6 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -288,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -305,7 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e41e6f..8fa32f5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1328,14 +1328,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-#ifdef CONFIG_SYN_COOKIES
-		req->cookie_ts = tmp_opt.tstamp_ok;
-#endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 84d818c..09fd34f 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -164,6 +164,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -180,7 +181,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -215,7 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->expires = 0UL;
 	req->retrans = 0;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f875345..5ebc27e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1269,7 +1269,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (!isn) {
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp
  2010-06-21 21:48 [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Florian Westphal
  2010-06-21 21:48 ` [PATCH 2/2] syncookies: add support for ECN Florian Westphal
@ 2010-06-23 20:28 ` Hagen Paul Pfeifer
  2010-06-24  7:53   ` Florian Westphal
  2010-06-27  5:00 ` David Miller
  2 siblings, 1 reply; 8+ messages in thread
From: Hagen Paul Pfeifer @ 2010-06-23 20:28 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev, Ilpo Järvinen

* Florian Westphal | 2010-06-21 23:48:44 [+0200]:

>As pointed out by Fernando Gont there is no need to encode rcv_wscale
>into the cookie.
>
>We did not use the restored rcv_wscale anyway; it is recomputed
>via tcp_select_initial_window().

I speculate that this behavior was and is not correct. I suppose that their is
a race between the SYN/ACK where we initial force a particular window scale
and the next time where we recalculate the window via tcp_select_initial_window().

If the user change net.core.rmem_max or net.ipv4.tcp_rmem in between this
time, the recalculated window scale (rcv_wscale) can be smaller. But the
receiver still operates with the initial window scale and can overshot the
granted window - and bang.

There are several solutions: encode rcv_wscale into the syn cookie and don't
recalculate or disable window scaling and don't transmit any scaling option
when SYN cookies are active.

HGN

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp
  2010-06-23 20:28 ` [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Hagen Paul Pfeifer
@ 2010-06-24  7:53   ` Florian Westphal
  2010-06-24 22:14     ` Hagen Paul Pfeifer
  0 siblings, 1 reply; 8+ messages in thread
From: Florian Westphal @ 2010-06-24  7:53 UTC (permalink / raw)
  To: Hagen Paul Pfeifer; +Cc: Florian Westphal, netdev, Ilpo Järvinen

Hagen Paul Pfeifer <hagen@jauu.net> wrote:
> * Florian Westphal | 2010-06-21 23:48:44 [+0200]:
> 
> >As pointed out by Fernando Gont there is no need to encode rcv_wscale
> >into the cookie.
> >
> >We did not use the restored rcv_wscale anyway; it is recomputed
> >via tcp_select_initial_window().
> 
> I speculate that this behavior was and is not correct. I suppose that their is
> a race between the SYN/ACK where we initial force a particular window scale
> and the next time where we recalculate the window via tcp_select_initial_window().

Yes, but it is not the only one. We also do a route lookup to get the
current window size.

> If the user change net.core.rmem_max or net.ipv4.tcp_rmem in between this
> time, the recalculated window scale (rcv_wscale) can be smaller. But the
> receiver still operates with the initial window scale and can overshot the
> granted window - and bang.

Why "bang"?
Sure, its not nice, but is it such a severe problem that we have to keep
rcv_wscale around in the timestamp?

> There are several solutions: encode rcv_wscale into the syn cookie and don't
> recalculate

I would prefer to avoid having to keep rcv_wscale in the timstamp for the sake of
a sysctl change. At the moment the state eats up 9 bits in the timestamp and I don't
like to grow it (which has to be done iff we want to support more options in the future).

>  or disable window scaling and don't transmit any scaling option
> when SYN cookies are active.

No, I would rather see this patch rejected than that.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp
  2010-06-24  7:53   ` Florian Westphal
@ 2010-06-24 22:14     ` Hagen Paul Pfeifer
  2010-06-27  4:27       ` David Miller
  0 siblings, 1 reply; 8+ messages in thread
From: Hagen Paul Pfeifer @ 2010-06-24 22:14 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev, Ilpo Järvinen

* Florian Westphal | 2010-06-24 09:53:04 [+0200]:

>> I speculate that this behavior was and is not correct. I suppose that their is
>> a race between the SYN/ACK where we initial force a particular window scale
>> and the next time where we recalculate the window via tcp_select_initial_window().
>
>Yes, but it is not the only one. We also do a route lookup to get the
>current window size.

Is this critical in any sense? The window size is a pure passive variable, we
do not announce any window information to the peer. I don't get the problem
for the window size.

>> If the user change net.core.rmem_max or net.ipv4.tcp_rmem in between this
>> time, the recalculated window scale (rcv_wscale) can be smaller. But the
>> receiver still operates with the initial window scale and can overshot the
>> granted window - and bang.
>
>Why "bang"?
>Sure, its not nice, but is it such a severe problem that we have to keep
>rcv_wscale around in the timestamp?

IMHO if we want a 100% race free behavior yes.

>>  or disable window scaling and don't transmit any scaling option
>> when SYN cookies are active.
>
>No, I would rather see this patch rejected than that.

Sure? SYN cookies are only active if we suffer under acute memory pressure. We
are likely not in the ability to backup large buffer space - why not limit
the window to 2^16 bytes which is sufficient for 99.9999% of the use case?
I do not identify any _real_ advantages to speed up a connection when the
server is under fire.

Another solution is to accept this patch and assume that this race is
rather artificial nature and does not happened. Sure, the race is very
unlikely[TM].

HGN

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp
  2010-06-24 22:14     ` Hagen Paul Pfeifer
@ 2010-06-27  4:27       ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2010-06-27  4:27 UTC (permalink / raw)
  To: hagen; +Cc: fw, netdev, ilpo.jarvinen

From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Fri, 25 Jun 2010 00:14:16 +0200

> why not limit the window to 2^16 bytes which is sufficient for
> 99.9999% of the use case?

This may have been true 8 years ago, but it is not any longer.

You will underutilize your link with anything smaller than
~512k on the modern internet.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp
  2010-06-21 21:48 [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Florian Westphal
  2010-06-21 21:48 ` [PATCH 2/2] syncookies: add support for ECN Florian Westphal
  2010-06-23 20:28 ` [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Hagen Paul Pfeifer
@ 2010-06-27  5:00 ` David Miller
  2 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2010-06-27  5:00 UTC (permalink / raw)
  To: fw; +Cc: netdev

From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 23:48:44 +0200

> As pointed out by Fernando Gont there is no need to encode rcv_wscale
> into the cookie.
> 
> We did not use the restored rcv_wscale anyway; it is recomputed
> via tcp_select_initial_window().
> 
> Thus we can save 4 bits in the ts option space by removing rcv_wscale.
> In case window scaling was not supported, we set the (invalid) wscale
> value 0xf.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>

Applied.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] syncookies: add support for ECN
  2010-06-21 21:48 ` [PATCH 2/2] syncookies: add support for ECN Florian Westphal
@ 2010-06-27  5:00   ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2010-06-27  5:00 UTC (permalink / raw)
  To: fw; +Cc: netdev

From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 23:48:45 +0200

> Allows use of ECN when syncookies are in effect by encoding ecn_ok
> into the syn-ack tcp timestamp.
> 
> While at it, remove a uneeded #ifdef CONFIG_SYN_COOKIES.
> With CONFIG_SYN_COOKIES=nm want_cookie is ifdef'd to 0 and gcc
> removes the "if (0)".
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>

Also applied, nice work Florian.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-06-27  5:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-21 21:48 [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Florian Westphal
2010-06-21 21:48 ` [PATCH 2/2] syncookies: add support for ECN Florian Westphal
2010-06-27  5:00   ` David Miller
2010-06-23 20:28 ` [PATCH 1/2] syncookies: do not store rcv_wscale in tcp timestamp Hagen Paul Pfeifer
2010-06-24  7:53   ` Florian Westphal
2010-06-24 22:14     ` Hagen Paul Pfeifer
2010-06-27  4:27       ` David Miller
2010-06-27  5:00 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.