All of lore.kernel.org
 help / color / mirror / Atom feed
* [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange
@ 2009-11-13  4:03 William Allen Simpson
  2009-11-13  4:07 ` [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK William Allen Simpson
                   ` (6 more replies)
  0 siblings, 7 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  4:03 UTC (permalink / raw)
  To: Linux Kernel Network Developers

Although 'make vmlinux' compiles this successfully, it is not tested.
Keep in mind that earlier variants were extensively tested.  But at least
this helps move the review comment process along....

Updated and reorganized to split the code base into several pieces, as
requested by Eric.  No significant changes.

The new TCP_MSS_* symbols are now in an entirely separate patch.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
@ 2009-11-13  4:07 ` William Allen Simpson
  2009-11-13  4:54   ` Ilpo Järvinen
  2009-11-13  4:17 ` [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie William Allen Simpson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  4:07 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 1006 bytes --]

Add optional function parameters associated with sending SYNACK.
These parameters are not needed after sending SYNACK, and are not
used for retransmission.  Avoids extending struct tcp_request_sock,
and avoids allocating kernel memory.

Also affects DCCP as it uses common struct request_sock_ops,
but this parameter is currently reserved for future use.

Signed-off-by: William.Allen.Simpson@gmail.com
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
  include/net/request_sock.h      |    8 +++++++-
  include/net/tcp.h               |    3 ++-
  net/dccp/ipv4.c                 |    5 +++--
  net/dccp/ipv6.c                 |    5 +++--
  net/dccp/minisocks.c            |    2 +-
  net/ipv4/inet_connection_sock.c |    2 +-
  net/ipv4/tcp_ipv4.c             |   19 +++++++++++--------
  net/ipv4/tcp_minisocks.c        |    2 +-
  net/ipv4/tcp_output.c           |    3 ++-
  net/ipv6/tcp_ipv6.c             |   28 +++++++++++++---------------
  10 files changed, 44 insertions(+), 33 deletions(-)

[-- Attachment #2: TCPCT+1a6.patch --]
[-- Type: text/plain, Size: 8878 bytes --]

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c719084..c9b50eb 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -27,13 +27,19 @@ struct sk_buff;
 struct dst_entry;
 struct proto;
 
+/* empty to "strongly type" an otherwise void parameter.
+ */
+struct request_values {
+};
+
 struct request_sock_ops {
 	int		family;
 	int		obj_size;
 	struct kmem_cache	*slab;
 	char		*slab_name;
 	int		(*rtx_syn_ack)(struct sock *sk,
-				       struct request_sock *req);
+				       struct request_sock *req,
+				       struct request_values *rvp);
 	void		(*send_ack)(struct sock *sk, struct sk_buff *skb,
 				    struct request_sock *req);
 	void		(*send_reset)(struct sock *sk,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 325bfcf..ec183fd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -440,7 +440,8 @@ extern int			tcp_connect(struct sock *sk);
 
 extern struct sk_buff *		tcp_make_synack(struct sock *sk,
 						struct dst_entry *dst,
-						struct request_sock *req);
+						struct request_sock *req,
+						struct request_values *rvp);
 
 extern int			tcp_disconnect(struct sock *sk, int flags);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2423a08..efbcfdc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -477,7 +477,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	return &rt->u.dst;
 }
 
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+				 struct request_values *rv_unused)
 {
 	int err = -1;
 	struct sk_buff *skb;
@@ -626,7 +627,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_iss	   = dccp_v4_init_sequence(skb);
 	dreq->dreq_service = service;
 
-	if (dccp_v4_send_response(sk, req))
+	if (dccp_v4_send_response(sk, req, NULL))
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 50ea91a..6574215 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -241,7 +241,8 @@ out:
 }
 
 
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
+				 struct request_values *rv_unused)
 {
 	struct inet6_request_sock *ireq6 = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -468,7 +469,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
 	dreq->dreq_service = service;
 
-	if (dccp_v6_send_response(sk, req))
+	if (dccp_v6_send_response(sk, req, NULL))
 		goto drop_and_free;
 
 	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 5ca49ce..af226a0 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -184,7 +184,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 			 * counter (backoff, monitored by dccp_response_timer).
 			 */
 			req->retrans++;
-			req->rsk_ops->rtx_syn_ack(sk, req);
+			req->rsk_ops->rtx_syn_ack(sk, req, NULL);
 		}
 		/* Network Duplicate, discard packet */
 		return NULL;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 26fb50e..ad098d6 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -531,7 +531,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 					       &expire, &resend);
 				if (!expire &&
 				    (!resend ||
-				     !req->rsk_ops->rtx_syn_ack(parent, req) ||
+				     !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
 				     inet_rsk(req)->acked)) {
 					unsigned long timeo;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cf7f208..094231b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -742,8 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  *	This still operates on a request_sock only, not on a big
  *	socket.
  */
-static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
-				struct dst_entry *dst)
+static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+				struct request_sock *req,
+				struct request_values *rvp)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	int err = -1;
@@ -753,7 +754,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req);
+	skb = tcp_make_synack(sk, dst, req, rvp);
 
 	if (skb) {
 		struct tcphdr *th = tcp_hdr(skb);
@@ -774,9 +775,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 	return err;
 }
 
-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
+static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+			      struct request_values *rvp)
 {
-	return __tcp_v4_send_synack(sk, req, NULL);
+	return __tcp_v4_send_synack(sk, NULL, req, rvp);
 }
 
 /*
@@ -1211,13 +1213,13 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct inet_request_sock *ireq;
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
+	struct inet_request_sock *ireq;
+	struct dst_entry *dst = NULL;
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
-	struct dst_entry *dst = NULL;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1337,7 +1339,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
-	if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
+	if (__tcp_v4_send_synack(sk, dst, req, NULL)
+	 || want_cookie)
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4be2228..7a42990 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -537,7 +537,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		 * Enforce "SYN-ACK" according to figure 8, figure 6
 		 * of RFC793, fixed by RFC1122.
 		 */
-		req->rsk_ops->rtx_syn_ack(sk, req);
+		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
 		return NULL;
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 616c686..1151cb8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2224,7 +2224,8 @@ int tcp_send_synack(struct sock *sk)
 
 /* Prepare a SYN-ACK. */
 struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
-				struct request_sock *req)
+				struct request_sock *req,
+				struct request_values *rvp)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index de70909..3e327bc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -461,7 +461,8 @@ out:
 }
 
 
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
+static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+			      struct request_values *rvp)
 {
 	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -499,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 	if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req);
+	skb = tcp_make_synack(sk, dst, req, rvp);
 	if (skb) {
 		struct tcphdr *th = tcp_hdr(skb);
 
@@ -1161,13 +1162,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_options_received tmp_opt;
+	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcp_options_received tmp_opt;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct request_sock *req = NULL;
-	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = __sk_dst_get(sk);
+	__u32 isn = TCP_SKB_CB(skb)->when;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1239,23 +1240,20 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 		isn = tcp_v6_init_sequence(skb);
 	}
-
 	tcp_rsk(req)->snt_isn = isn;
 
 	security_inet_conn_request(sk, skb, req);
 
-	if (tcp_v6_send_synack(sk, req))
-		goto drop;
+	if (tcp_v6_send_synack(sk, req, NULL)
+	 || want_cookie)
+		goto drop_and_free;
 
-	if (!want_cookie) {
-		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-		return 0;
-	}
+	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+	return 0;
 
+drop_and_free:
+	reqsk_free(req);
 drop:
-	if (req)
-		reqsk_free(req);
-
 	return 0; /* don't send reset */
 }
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
  2009-11-13  4:07 ` [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK William Allen Simpson
@ 2009-11-13  4:17 ` William Allen Simpson
  2009-11-13  6:21   ` Eric Dumazet
  2009-11-13  6:26   ` Joe Perches
  2009-11-13  4:31 ` [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  4:17 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 807 bytes --]

Define (missing) hash message size for SHA1.

Define hashing size constants specific to TCP cookies, and add new function.

Maintain global secret values for tcp_cookie_generator().

This is a significantly revised implementation of earlier (15-year-old)
Photuris [RFC-2522] code for the KA9Q cooperative multitasking platform.

Linux RCU technique appears to be well-suited to this application, though
neither of the circular queue items are freed.

These functions will also be used in subsequent patches that implement
additional features.

Signed-off-by: William.Allen.Simpson@gmail.com
---
  include/linux/cryptohash.h |    1 +
  include/net/tcp.h          |    8 +++
  net/ipv4/tcp.c             |  146 ++++++++++++++++++++++++++++++++++++++++++++
  3 files changed, 155 insertions(+), 0 deletions(-)

[-- Attachment #2: TCPCT+1b6.patch --]
[-- Type: text/plain, Size: 6995 bytes --]

diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index c118b2a..ec78a4b 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -2,6 +2,7 @@
 #define __CRYPTOHASH_H
 
 #define SHA_DIGEST_WORDS 5
+#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
 #define SHA_WORKSPACE_WORDS 80
 
 void sha_init(__u32 *buf);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ec183fd..4a99a8e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1478,6 +1478,14 @@ struct tcp_request_sock_ops {
 #endif
 };
 
+/* Using SHA1 for now, define some constants.
+ */
+#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
+#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
+#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
+
+extern int tcp_cookie_generator(u32 *bakery);
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e0cfa63..3ae01bf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -264,6 +264,7 @@
 #include <linux/cache.h>
 #include <linux/err.h>
 #include <linux/crypto.h>
+#include <linux/time.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -2842,6 +2843,141 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
+/**
+ * Each Responder maintains up to two secret values concurrently for
+ * efficient secret rollover.  Each secret value has 4 states:
+ *
+ * Generating.  (tcp_secret_generating != tcp_secret_primary)
+ *    Generates new Responder-Cookies, but not yet used for primary
+ *    verification.  This is a short-term state, typically lasting only
+ *    one round trip time (RTT).
+ *
+ * Primary.  (tcp_secret_generating == tcp_secret_primary)
+ *    Used both for generation and primary verification.
+ *
+ * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
+ *    Used for verification, until the first failure that can be
+ *    verified by the newer Generating secret.  At that time, this
+ *    cookie's state is changed to Secondary, and the Generating
+ *    cookie's state is changed to Primary.  This is a short-term state,
+ *    typically lasting only one round trip time (RTT).
+ *
+ * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
+ *    Used for secondary verification, after primary verification
+ *    failures.  This state lasts no more than twice the Maximum Segment
+ *    Lifetime (2MSL).  Then, the secret is discarded.
+ */
+struct tcp_cookie_secret {
+	/* The secret is divided into two parts.  The digest part is the
+	 * equivalent of previously hashing a secret and saving the state,
+	 * and serves as an initialization vector (IV).  The message part
+	 * serves as the trailing secret.
+	 */
+	u32				secrets[COOKIE_WORKSPACE_WORDS];
+	unsigned long			expires;
+};
+
+#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
+#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
+#define TCP_SECRET_LIFE (HZ * 600)
+
+static struct tcp_cookie_secret tcp_secret_one;
+static struct tcp_cookie_secret tcp_secret_two;
+
+/* Essentially a circular list, without dynamic allocation. */
+static struct tcp_cookie_secret *tcp_secret_generating;
+static struct tcp_cookie_secret *tcp_secret_primary;
+static struct tcp_cookie_secret *tcp_secret_retiring;
+static struct tcp_cookie_secret *tcp_secret_secondary;
+
+static DEFINE_SPINLOCK(tcp_secret_locker);
+
+/* Select a random word in the cookie workspace.
+ */
+static inline u32 tcp_cookie_work(const u32 *ws, const int n)
+{
+	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
+}
+
+/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
+ * Called in softirq context.
+ * Returns: 0 for success.
+ */
+int tcp_cookie_generator(u32 *bakery)
+{
+	unsigned long jiffy = jiffies;
+
+	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
+		spin_lock_bh(&tcp_secret_locker);
+		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
+			/* refreshed by another */
+			spin_unlock_bh(&tcp_secret_locker);
+			memcpy(bakery,
+			       &tcp_secret_generating->secrets[0],
+			       sizeof(tcp_secret_generating->secrets));
+		} else {
+			u32 secrets[COOKIE_WORKSPACE_WORDS];
+
+			/* still needs refreshing */
+			get_random_bytes(secrets, sizeof(secrets));
+
+			/* The first time, paranoia assumes that the
+			 * randomization function isn't as strong.  But,
+			 * this secret initialization is delayed until
+			 * the last possible moment (packet arrival).
+			 * Although that time is observable, it is
+			 * unpredictably variable.  Mash in the most
+			 * volatile clock bits available, and expire the
+			 * secret extra quickly.
+			 */
+			if (unlikely(tcp_secret_primary->expires ==
+				     tcp_secret_secondary->expires)) {
+				struct timespec tv;
+
+				getnstimeofday(&tv);
+				secrets[COOKIE_DIGEST_WORDS+0] ^=
+					(u32)tv.tv_nsec;
+
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_1MSL
+					+ (0x0f & tcp_cookie_work(secrets, 0));
+			} else {
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_LIFE
+					+ (0xff & tcp_cookie_work(secrets, 1));
+				tcp_secret_primary->expires = jiffy
+					+ TCP_SECRET_2MSL
+					+ (0x1f & tcp_cookie_work(secrets, 2));
+			}
+			memcpy(&tcp_secret_secondary->secrets[0],
+			       &secrets[0],
+			       sizeof(secrets));
+
+			rcu_assign_pointer(tcp_secret_generating,
+					   tcp_secret_secondary);
+			rcu_assign_pointer(tcp_secret_retiring,
+					   tcp_secret_primary);
+			spin_unlock_bh(&tcp_secret_locker);
+			/* Neither call_rcu() or synchronize_rcu() are needed.
+			 * Retiring data is not freed.  It is replaced after
+			 * further (locked) pointer updates, and a quiet time
+			 * (minimum 1MSL, maximum LIFE - 2MSL).
+			 */
+			memcpy(bakery,
+			       &secrets[0],
+			       sizeof(secrets));
+		}
+	} else {
+		rcu_read_lock_bh();
+		memcpy(bakery,
+		       &rcu_dereference(tcp_secret_generating)->secrets[0],
+		       sizeof(tcp_secret_generating->secrets));
+		rcu_read_unlock_bh();
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcp_cookie_generator);
+
 void tcp_done(struct sock *sk)
 {
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
@@ -2876,6 +3012,7 @@ void __init tcp_init(void)
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
 	int order, i, max_share;
+	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -2969,6 +3106,15 @@ void __init tcp_init(void)
 	       tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
+
+	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
+	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
+	tcp_secret_one.expires = jiffy; /* past due */
+	tcp_secret_two.expires = jiffy; /* past due */
+	tcp_secret_generating = &tcp_secret_one;
+	tcp_secret_primary = &tcp_secret_one;
+	tcp_secret_retiring = &tcp_secret_two;
+	tcp_secret_secondary = &tcp_secret_two;
 }
 
 EXPORT_SYMBOL(tcp_close);
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
  2009-11-13  4:07 ` [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK William Allen Simpson
  2009-11-13  4:17 ` [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie William Allen Simpson
@ 2009-11-13  4:31 ` William Allen Simpson
  2009-11-13 18:37   ` Joe Perches
  2009-11-13  4:53 ` [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's William Allen Simpson
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  4:31 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 1190 bytes --]

Define sysctl (tcp_cookie_size) to turn on and off the cookie option
default globally, instead of a compiled configuration option.

Define per socket option (TCP_COOKIE_TRANSACTIONS) for setting constant
data values, retrieving variable cookie values, and other facilities.

Move inline tcp_clear_options() unchanged from net/tcp.h to linux/tcp.h,
near its corresponding struct tcp_options_received (prior to changes).

This is a straightforward re-implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

These functions will also be used in subsequent patches that implement
additional features.

Requires:
   net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED

Signed-off-by: William.Allen.Simpson@gmail.com
---
  Documentation/networking/ip-sysctl.txt |    8 +++++++
  include/linux/tcp.h                    |   33 +++++++++++++++++++++++++++++++-
  include/net/tcp.h                      |    6 +----
  net/ipv4/sysctl_net_ipv4.c             |    8 +++++++
  net/ipv4/tcp_output.c                  |    8 +++++++
  5 files changed, 57 insertions(+), 6 deletions(-)

[-- Attachment #2: TCPCT+1c6.patch --]
[-- Type: text/plain, Size: 5089 bytes --]

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a0e134d..d47c000 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -164,6 +164,14 @@ tcp_congestion_control - STRING
 	additional choices may be available based on kernel configuration.
 	Default is set as part of kernel configuration.
 
+tcp_cookie_size - INTEGER
+	Default size of TCP Cookie Transactions (TCPCT) option, that may be
+	overridden on a per socket basis by the TCPCT socket option.
+	Values greater than the maximum (16) are interpreted as the maximum.
+	Values greater than zero and less than the minimum (8) are interpreted
+	as the minimum.
+	Default: 0 (off).
+
 tcp_dsack - BOOLEAN
 	Allows TCP to send "duplicate" SACKs.
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 32d7d77..eaa3113 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -102,7 +102,9 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 
+/* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
 #define TCPI_OPT_WSCALE		4
@@ -174,6 +176,30 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
+/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */
+#define TCP_COOKIE_MIN		 8		/*  64-bits */
+#define TCP_COOKIE_MAX		16		/* 128-bits */
+#define TCP_COOKIE_PAIR_SIZE	(2*TCP_COOKIE_MAX)
+
+/* Flags for both getsockopt and setsockopt */
+#define TCP_COOKIE_IN_ALWAYS	(1 << 0)	/* Discard SYN without cookie */
+#define TCP_COOKIE_OUT_NEVER	(1 << 1)	/* Prohibit outgoing cookies,
+						 * supercedes everything. */
+
+/* Flags for getsockopt */
+#define TCP_S_DATA_IN		(1 << 2)	/* Was data received? */
+#define TCP_S_DATA_OUT		(1 << 3)	/* Was data sent? */
+
+/* TCP_COOKIE_TRANSACTIONS data */
+struct tcp_cookie_transactions {
+	__u16	tcpct_flags;			/* see above */
+	__u8	__tcpct_pad1;			/* zero */
+	__u8	tcpct_cookie_desired;		/* bytes */
+	__u16	tcpct_s_data_desired;		/* bytes of variable data */
+	__u16	tcpct_used;			/* bytes in value */
+	__u8	tcpct_value[TCP_MSS_DEFAULT];
+};
+
 #ifdef __KERNEL__
 
 #include <linux/skbuff.h>
@@ -227,6 +253,11 @@ struct tcp_options_received {
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
+static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
+{
+	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+}
+
 /* This is the max number of SACKS that we'll generate and process. It's safe
  * to increse this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
@@ -435,6 +466,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
 	return (struct tcp_timewait_sock *)sk;
 }
 
-#endif
+#endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4a99a8e..738b65f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -234,6 +234,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_cookie_size;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -340,11 +341,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
 
 extern void tcp_enter_quickack_mode(struct sock *sk);
 
-static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
-{
- 	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
-}
-
 #define	TCP_ECN_OK		1
 #define	TCP_ECN_QUEUE_CWR	2
 #define	TCP_ECN_DEMAND_CWR	4
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..3422c54 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -714,6 +714,14 @@ static struct ctl_table ipv4_table[] = {
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_cookie_size",
+		.data		= &sysctl_tcp_cookie_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
 		.maxlen		= sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1151cb8..e59fa5a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,14 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+#ifdef CONFIG_SYSCTL
+/* By default, let the user enable it. */
+int sysctl_tcp_cookie_size __read_mostly = 0;
+#else
+int sysctl_tcp_cookie_size __read_mostly = TCP_COOKIE_MAX;
+#endif
+
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
                   ` (2 preceding siblings ...)
  2009-11-13  4:31 ` [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
@ 2009-11-13  4:53 ` William Allen Simpson
  2009-11-13  6:32   ` Eric Dumazet
  2009-11-13  5:10 ` [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  4:53 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 1702 bytes --]

Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.

This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.

    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html

    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail

These functions will also be used in subsequent patches that implement
additional features.

Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS

Signed-off-by: William.Allen.Simpson@gmail.com
---
  include/linux/tcp.h |   29 ++++++++++++++++----
  include/net/tcp.h   |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 95 insertions(+), 6 deletions(-)

[-- Attachment #2: TCPCT+1d6.patch --]
[-- Type: text/plain, Size: 6048 bytes --]

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eaa3113..6c5ff66 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -247,31 +247,38 @@ struct tcp_options_received {
 		sack_ok : 4,	/* SACK seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-/*	SACKs data	*/
+	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
+		cookie_out_never:1,
+		cookie_in_always:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
-	u16	user_mss;  	/* mss requested by user in ioctl */
+	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
-	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
+	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->cookie_plus = 0;
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
- * to increse this, although since:
+ * to increase this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
  * only four options will fit in a standard TCP header */
 #define TCP_NUM_SACKS 4
 
+struct tcp_cookie_values;
+struct tcp_request_sock_ops;
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
 #endif
-	u32			 	rcv_isn;
-	u32			 	snt_isn;
+	u32				rcv_isn;
+	u32				snt_isn;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -441,6 +448,12 @@ struct tcp_sock {
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
+
+	/* When the cookie options are generated and exchanged, then this
+	 * object holds a reference to them (cookie_values->kref).  Also
+	 * contains related tcp_cookie_transactions fields.
+	 */
+	struct tcp_cookie_values  	*cookie_values;
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -459,6 +472,10 @@ struct tcp_timewait_sock {
 	u16			  tw_md5_keylen;
 	u8			  tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
 #endif
+	/* Few sockets in timewait have cookies; in that case, then this
+	 * object holds a reference to them (tw_cookie_values->kref).
+	 */
+	struct tcp_cookie_values  *tw_cookie_values;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 738b65f..3a4c840 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
+#include <linux/kref.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE		253	/* Cookie extension (experimental) */
 
 /*
  *     TCP option lengths
@@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
 #define TCPOLEN_MD5SIG         18
+#define TCPOLEN_COOKIE_BASE    2	/* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR    3	/* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MIN     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
+#define TCPOLEN_COOKIE_MAX     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -1482,6 +1488,72 @@ struct tcp_request_sock_ops {
 
 extern int tcp_cookie_generator(u32 *bakery);
 
+/**
+ * A tcp_sock contains a pointer to the current value, and this is cloned to
+ * the tcp_timewait_sock.
+ *
+ * @cookie_pair:	variable data from the option exchange.
+ *
+ * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
+ *			indicates default (sysctl_tcp_cookie_size).
+ *			After cookie sent, remembers size of cookie.
+ *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
+ *
+ * @s_data_desired:	user specified tcpct_s_data_desired.  When the
+ *			constant payload is specified (@s_data_constant),
+ *			holds its length instead.
+ *			Range 0 to TCP_MSS_DESIRED.
+ *
+ * @s_data_payload:	constant data that is to be included in the
+ *			payload of SYN or SYNACK segments when the
+ *			cookie option is present.
+ */
+struct tcp_cookie_values {
+	struct kref	kref;
+	u8		cookie_pair[TCP_COOKIE_PAIR_SIZE];
+	u8		cookie_pair_size;
+	u8		cookie_desired;
+	u16		s_data_desired:11,
+			s_data_constant:1,
+			s_data_in:1,
+			s_data_out:1,
+			s_data_unused:2;
+	u8		s_data_payload[0];
+};
+
+static inline void tcp_cookie_values_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tcp_cookie_values, kref));
+}
+
+/* The length of constant payload data.  Note that s_data_desired is
+ * overloaded, depending on s_data_constant: either the length of constant
+ * data (returned here) or the limit on variable data.
+ */
+static inline int tcp_s_data_size(const struct tcp_sock *tp)
+{
+	return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
+		? tp->cookie_values->s_data_desired
+		: 0;
+}
+
+/* As tcp_request_sock has already been extended in other places, the
+ * only remaining method is to pass stack values along as function
+ * parameters.  These parameters are not needed after sending SYNACK.
+ */
+struct tcp_extend_values {
+	struct request_values		rv;
+	u32				cookie_bakery[COOKIE_WORKSPACE_WORDS];
+	u8				cookie_plus:6,
+					cookie_out_never:1,
+					cookie_in_always:1;
+};
+
+static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
+{
+	return (struct tcp_extend_values *)rvp;
+}
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK
  2009-11-13  4:07 ` [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK William Allen Simpson
@ 2009-11-13  4:54   ` Ilpo Järvinen
  0 siblings, 0 replies; 42+ messages in thread
From: Ilpo Järvinen @ 2009-11-13  4:54 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers

On Thu, 12 Nov 2009, William Allen Simpson wrote:

> Add optional function parameters associated with sending SYNACK.
> These parameters are not needed after sending SYNACK, and are not
> used for retransmission.  Avoids extending struct tcp_request_sock,
> and avoids allocating kernel memory.
> 
> Also affects DCCP as it uses common struct request_sock_ops,
> but this parameter is currently reserved for future use.
> 
> Signed-off-by: William.Allen.Simpson@gmail.com
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  include/net/request_sock.h      |    8 +++++++-
>  include/net/tcp.h               |    3 ++-
>  net/dccp/ipv4.c                 |    5 +++--
>  net/dccp/ipv6.c                 |    5 +++--
>  net/dccp/minisocks.c            |    2 +-
>  net/ipv4/inet_connection_sock.c |    2 +-
>  net/ipv4/tcp_ipv4.c             |   19 +++++++++++--------
>  net/ipv4/tcp_minisocks.c        |    2 +-
>  net/ipv4/tcp_output.c           |    3 ++-
>  net/ipv6/tcp_ipv6.c             |   28 +++++++++++++---------------
>  10 files changed, 44 insertions(+), 33 deletions(-)
> 

> diff --git a/include/net/request_sock.h b/include/net/request_sock.h
> index c719084..c9b50eb 100644
> --- a/include/net/request_sock.h
> +++ b/include/net/request_sock.h
> @@ -27,13 +27,19 @@ struct sk_buff;
>  struct dst_entry;
>  struct proto;
>  
> +/* empty to "strongly type" an otherwise void parameter.
> + */
> +struct request_values {
> +};
> +
>  struct request_sock_ops {
>  	int		family;
>  	int		obj_size;
>  	struct kmem_cache	*slab;
>  	char		*slab_name;
>  	int		(*rtx_syn_ack)(struct sock *sk,
> -				       struct request_sock *req);
> +				       struct request_sock *req,
> +				       struct request_values *rvp);
>  	void		(*send_ack)(struct sock *sk, struct sk_buff *skb,
>  				    struct request_sock *req);
>  	void		(*send_reset)(struct sock *sk,
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 325bfcf..ec183fd 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -440,7 +440,8 @@ extern int			tcp_connect(struct sock *sk);
>  
>  extern struct sk_buff *		tcp_make_synack(struct sock *sk,
>  						struct dst_entry *dst,
> -						struct request_sock *req);
> +						struct request_sock *req,
> +						struct request_values *rvp);
>  
>  extern int			tcp_disconnect(struct sock *sk, int flags);
>  
> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
> index 2423a08..efbcfdc 100644
> --- a/net/dccp/ipv4.c
> +++ b/net/dccp/ipv4.c
> @@ -477,7 +477,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
>  	return &rt->u.dst;
>  }
>  
> -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
> +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
> +				 struct request_values *rv_unused)
>  {
>  	int err = -1;
>  	struct sk_buff *skb;
> @@ -626,7 +627,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>  	dreq->dreq_iss	   = dccp_v4_init_sequence(skb);
>  	dreq->dreq_service = service;
>  
> -	if (dccp_v4_send_response(sk, req))
> +	if (dccp_v4_send_response(sk, req, NULL))
>  		goto drop_and_free;
>  
>  	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
> diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
> index 50ea91a..6574215 100644
> --- a/net/dccp/ipv6.c
> +++ b/net/dccp/ipv6.c
> @@ -241,7 +241,8 @@ out:
>  }
>  
>  
> -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
> +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
> +				 struct request_values *rv_unused)
>  {
>  	struct inet6_request_sock *ireq6 = inet6_rsk(req);
>  	struct ipv6_pinfo *np = inet6_sk(sk);
> @@ -468,7 +469,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>  	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
>  	dreq->dreq_service = service;
>  
> -	if (dccp_v6_send_response(sk, req))
> +	if (dccp_v6_send_response(sk, req, NULL))
>  		goto drop_and_free;
>  
>  	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
> diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
> index 5ca49ce..af226a0 100644
> --- a/net/dccp/minisocks.c
> +++ b/net/dccp/minisocks.c
> @@ -184,7 +184,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
>  			 * counter (backoff, monitored by dccp_response_timer).
>  			 */
>  			req->retrans++;
> -			req->rsk_ops->rtx_syn_ack(sk, req);
> +			req->rsk_ops->rtx_syn_ack(sk, req, NULL);
>  		}
>  		/* Network Duplicate, discard packet */
>  		return NULL;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 26fb50e..ad098d6 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -531,7 +531,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
>  					       &expire, &resend);
>  				if (!expire &&
>  				    (!resend ||
> -				     !req->rsk_ops->rtx_syn_ack(parent, req) ||
> +				     !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
>  				     inet_rsk(req)->acked)) {
>  					unsigned long timeo;
>  
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index cf7f208..094231b 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -742,8 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
>   *	This still operates on a request_sock only, not on a big
>   *	socket.
>   */
> -static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
> -				struct dst_entry *dst)
> +static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
> +				struct request_sock *req,
> +				struct request_values *rvp)
>  {
>  	const struct inet_request_sock *ireq = inet_rsk(req);
>  	int err = -1;
> @@ -753,7 +754,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
>  	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
>  		return -1;
>  
> -	skb = tcp_make_synack(sk, dst, req);
> +	skb = tcp_make_synack(sk, dst, req, rvp);
>  
>  	if (skb) {
>  		struct tcphdr *th = tcp_hdr(skb);
> @@ -774,9 +775,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
>  	return err;
>  }
>  
> -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
> +static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
> +			      struct request_values *rvp)
>  {
> -	return __tcp_v4_send_synack(sk, req, NULL);
> +	return __tcp_v4_send_synack(sk, NULL, req, rvp);
>  }
>  
>  /*
> @@ -1211,13 +1213,13 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
>  
>  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>  {
> -	struct inet_request_sock *ireq;
>  	struct tcp_options_received tmp_opt;
>  	struct request_sock *req;
> +	struct inet_request_sock *ireq;
> +	struct dst_entry *dst = NULL;
>  	__be32 saddr = ip_hdr(skb)->saddr;
>  	__be32 daddr = ip_hdr(skb)->daddr;
>  	__u32 isn = TCP_SKB_CB(skb)->when;
> -	struct dst_entry *dst = NULL;
>  #ifdef CONFIG_SYN_COOKIES
>  	int want_cookie = 0;
>  #else
> @@ -1337,7 +1339,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>  	}
>  	tcp_rsk(req)->snt_isn = isn;
>  
> -	if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
> +	if (__tcp_v4_send_synack(sk, dst, req, NULL)
> +	 || want_cookie)
>  		goto drop_and_free;
>  
>  	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 4be2228..7a42990 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -537,7 +537,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
>  		 * Enforce "SYN-ACK" according to figure 8, figure 6
>  		 * of RFC793, fixed by RFC1122.
>  		 */
> -		req->rsk_ops->rtx_syn_ack(sk, req);
> +		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
>  		return NULL;
>  	}
>  
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 616c686..1151cb8 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2224,7 +2224,8 @@ int tcp_send_synack(struct sock *sk)
>  
>  /* Prepare a SYN-ACK. */
>  struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
> -				struct request_sock *req)
> +				struct request_sock *req,
> +				struct request_values *rvp)
>  {
>  	struct inet_request_sock *ireq = inet_rsk(req);
>  	struct tcp_sock *tp = tcp_sk(sk);
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index de70909..3e327bc 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -461,7 +461,8 @@ out:
>  }
>  
>  
> -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
> +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
> +			      struct request_values *rvp)
>  {
>  	struct inet6_request_sock *treq = inet6_rsk(req);
>  	struct ipv6_pinfo *np = inet6_sk(sk);
> @@ -499,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
>  	if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
>  		goto done;
>  
> -	skb = tcp_make_synack(sk, dst, req);
> +	skb = tcp_make_synack(sk, dst, req, rvp);
>  	if (skb) {
>  		struct tcphdr *th = tcp_hdr(skb);
>  
> @@ -1161,13 +1162,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
>   */
>  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>  {
> +	struct tcp_options_received tmp_opt;
> +	struct request_sock *req;
>  	struct inet6_request_sock *treq;
>  	struct ipv6_pinfo *np = inet6_sk(sk);
> -	struct tcp_options_received tmp_opt;
>  	struct tcp_sock *tp = tcp_sk(sk);
> -	struct request_sock *req = NULL;
> -	__u32 isn = TCP_SKB_CB(skb)->when;
>  	struct dst_entry *dst = __sk_dst_get(sk);
> +	__u32 isn = TCP_SKB_CB(skb)->when;
>  #ifdef CONFIG_SYN_COOKIES
>  	int want_cookie = 0;
>  #else
> @@ -1239,23 +1240,20 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>  
>  		isn = tcp_v6_init_sequence(skb);
>  	}
> -
>  	tcp_rsk(req)->snt_isn = isn;
>  
>  	security_inet_conn_request(sk, skb, req);
>  
> -	if (tcp_v6_send_synack(sk, req))
> -		goto drop;
> +	if (tcp_v6_send_synack(sk, req, NULL)
> +	 || want_cookie)

This fits to one line very well. There were other unnecessary line-breaks 
too (might have been in another patch than this).

However, in cases where you really would have to split lines in if 
statement. Please move || (or &&) to the end of the previous line and use 
matching indent level on the line below (in this example the text is 
matching already, so it is just about converting || into spaces here). 
...As was mentioned by somebody already earlier.

Splitting of 1-3 is quite ok, the rest haven't arrived yet.

> +		goto drop_and_free;
>  
> -	if (!want_cookie) {
> -		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
> -		return 0;
> -	}
> +	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
> +	return 0;
>  
> +drop_and_free:
> +	reqsk_free(req);
>  drop:
> -	if (req)
> -		reqsk_free(req);
> -
>  	return 0; /* don't send reset */
>  }
>  
> -- 
> 1.6.3.3
> 
-- 
 i.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
                   ` (3 preceding siblings ...)
  2009-11-13  4:53 ` [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's William Allen Simpson
@ 2009-11-13  5:10 ` William Allen Simpson
  2009-11-13 14:11   ` Andi Kleen
  2009-11-18 15:03   ` William Allen Simpson
  2009-11-13  5:40 ` [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder William Allen Simpson
  2009-11-13  5:53 ` [net-next-2.6 PATCH v6 7/7 RFC] TCPCT part 1g: Responder Cookie => Initiator William Allen Simpson
  6 siblings, 2 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  5:10 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 868 bytes --]

Provide per socket control of the TCP cookie option and SYN/SYNACK data.

This is a straightforward re-implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.

Allocations have been rearranged to avoid requiring GFP_ATOMIC.

Requires:
   net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
   TCPCT part 1d: define TCP cookie option, extend existing struct's

Signed-off-by: William.Allen.Simpson@gmail.com
---
  net/ipv4/tcp.c |  133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
  1 files changed, 131 insertions(+), 2 deletions(-)

[-- Attachment #2: TCPCT+1e6.patch --]
[-- Type: text/plain, Size: 4406 bytes --]

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ae01bf..3424499 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2079,8 +2079,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	int val;
 	int err = 0;
 
-	/* This is a string value all the others are int's */
-	if (optname == TCP_CONGESTION) {
+	/* These are data/string values, all the others are ints */
+	switch (optname) {
+	case TCP_CONGESTION: {
 		char name[TCP_CA_NAME_MAX];
 
 		if (optlen < 1)
@@ -2097,6 +2098,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		release_sock(sk);
 		return err;
 	}
+	case TCP_COOKIE_TRANSACTIONS: {
+		struct tcp_cookie_transactions ctd;
+		struct tcp_cookie_values *cvp = NULL;
+
+		if (sizeof(ctd) > optlen)
+			return -EINVAL;
+		if (copy_from_user(&ctd, optval, sizeof(ctd)))
+			return -EFAULT;
+
+		if (ctd.tcpct_used > sizeof(ctd.tcpct_value)
+		 || ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
+			return -EINVAL;
+
+		if (ctd.tcpct_cookie_desired == 0) {
+			/* default to global value */
+		} else if ((0x1 & ctd.tcpct_cookie_desired)
+			|| ctd.tcpct_cookie_desired > TCP_COOKIE_MAX
+			|| ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
+			return -EINVAL;
+		}
+
+		if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
+			/* Supercedes all other values */
+			lock_sock(sk);
+			if (tp->cookie_values != NULL) {
+				kref_put(&tp->cookie_values->kref,
+					 tcp_cookie_values_release);
+				tp->cookie_values = NULL;
+			}
+			tp->rx_opt.cookie_in_always = 0; /* false */
+			tp->rx_opt.cookie_out_never = 1; /* true */
+			release_sock(sk);
+			return err;
+		}
+
+		/* Allocate ancillary memory before locking.
+		 */
+		if (ctd.tcpct_used > 0
+		 || (tp->cookie_values == NULL
+		  && (sysctl_tcp_cookie_size > 0
+		   || ctd.tcpct_cookie_desired > 0
+		   || ctd.tcpct_s_data_desired > 0))) {
+			cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
+				      GFP_KERNEL);
+			if (cvp == NULL)
+				return -ENOMEM;
+		}
+		lock_sock(sk);
+		tp->rx_opt.cookie_in_always =
+			(TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
+		tp->rx_opt.cookie_out_never = 0; /* false */
+
+		if (tp->cookie_values != NULL) {
+			if (cvp != NULL) {
+				/* Changed values are recorded by a changed
+				 * pointer, ensuring the cookie will differ,
+				 * without separately hashing each value later.
+				 */
+				kref_put(&tp->cookie_values->kref,
+					 tcp_cookie_values_release);
+				kref_init(&cvp->kref);
+				tp->cookie_values = cvp;
+			} else {
+				cvp = tp->cookie_values;
+			}
+		}
+		if (cvp != NULL) {
+			cvp->cookie_desired = ctd.tcpct_cookie_desired;
+
+			if (ctd.tcpct_used > 0) {
+				memcpy(cvp->s_data_payload, ctd.tcpct_value,
+				       ctd.tcpct_used);
+				cvp->s_data_desired = ctd.tcpct_used;
+				cvp->s_data_constant = 1; /* true */
+			} else {
+				/* No constant payload data. */
+				cvp->s_data_desired = ctd.tcpct_s_data_desired;
+				cvp->s_data_constant = 0; /* false */
+			}
+		}
+		release_sock(sk);
+		return err;
+	}
+	default:
+		/* fallthru */
+		break;
+	};
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -2421,6 +2509,47 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
+
+	case TCP_COOKIE_TRANSACTIONS: {
+		struct tcp_cookie_transactions ctd;
+		struct tcp_cookie_values *cvp = tp->cookie_values;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+		if (len < sizeof(ctd))
+			return -EINVAL;
+
+		memset(&ctd, 0, sizeof(ctd));
+		ctd.tcpct_flags = (tp->rx_opt.cookie_in_always
+				   ? TCP_COOKIE_IN_ALWAYS : 0)
+				+ (tp->rx_opt.cookie_out_never
+				   ? TCP_COOKIE_OUT_NEVER : 0);
+
+		if (cvp != NULL) {
+			ctd.tcpct_flags += (cvp->s_data_in
+					    ? TCP_S_DATA_IN : 0)
+					 + (cvp->s_data_out
+					    ? TCP_S_DATA_OUT : 0);
+
+			ctd.tcpct_cookie_desired = cvp->cookie_desired;
+			ctd.tcpct_s_data_desired = cvp->s_data_desired;
+
+			/* Cookie(s) saved, return as nonce */
+			if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) {
+				/* impossible? */
+				return -EINVAL;
+			}
+			memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
+			       cvp->cookie_pair_size);
+			ctd.tcpct_used = cvp->cookie_pair_size;
+		}
+
+		if (put_user(sizeof(ctd), optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &ctd, sizeof(ctd)))
+			return -EFAULT;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
                   ` (4 preceding siblings ...)
  2009-11-13  5:10 ` [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
@ 2009-11-13  5:40 ` William Allen Simpson
  2009-11-13 16:51   ` William Allen Simpson
  2009-11-13  5:53 ` [net-next-2.6 PATCH v6 7/7 RFC] TCPCT part 1g: Responder Cookie => Initiator William Allen Simpson
  6 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  5:40 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 783 bytes --]

Calculate and format TCP_COOKIE SYN option.

Create (and destroy) cookie data structures.

This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

Requires:
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
   TCPCT part 1d: define TCP cookie option, extend existing struct's

Signed-off-by: William.Allen.Simpson@gmail.com
---
  net/ipv4/tcp_ipv4.c      |   20 +++++
  net/ipv4/tcp_minisocks.c |   45 +++++++++--
  net/ipv4/tcp_output.c    |  182 ++++++++++++++++++++++++++++++++++++++++------
  net/ipv6/tcp_ipv6.c      |   13 +++
  4 files changed, 228 insertions(+), 32 deletions(-)

[-- Attachment #2: TCPCT+1f6.patch --]
[-- Type: text/plain, Size: 13998 bytes --]

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 094231b..2ae1985 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1834,6 +1834,19 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv4_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
@@ -1887,6 +1900,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+	/* TCP Cookie Transactions */
+	if (tp->cookie_values != NULL) {
+		kref_put(&tp->cookie_values->kref,
+			 tcp_cookie_values_release);
+		tp->cookie_values = NULL;
+	}
+
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a42990..2b50da8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -389,14 +389,42 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
-		struct tcp_sock *newtp;
+		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
+
+		/* TCP Cookie Transactions require space for the cookie pair,
+		 * as it differs for each connection.  There is no need to
+		 * copy any s_data stored at the original socket.  Failure
+		 * will prevent resuming the connection.
+		 *
+		 * Presumed copied, in order of appearance:
+		 *	cookie_in_always, cookie_out_never
+		 */
+		if (oldcvp != NULL) {
+			struct tcp_cookie_values *newcvp =
+				kzalloc(sizeof(*newtp->cookie_values),
+					GFP_ATOMIC);
+
+			if (newcvp != NULL) {
+				kref_init(&newcvp->kref);
+				newcvp->cookie_desired =
+						oldcvp->cookie_desired;
+				newtp->cookie_values = newcvp;
+			} else {
+				/* Not Yet Implemented */
+				newtp->cookie_values = NULL;
+			}
+		}
 
 		/* Now setup tcp_sock */
-		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
-		newtp->snd_up = treq->snt_isn + 1;
+
+		newtp->rcv_wup = newtp->copied_seq =
+		newtp->rcv_nxt = treq->rcv_isn + 1;
+
+		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt =
+		newtp->snd_up = treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		tcp_prequeue_init(newtp);
 
@@ -429,8 +457,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = treq->snt_isn + 1;
-		newtp->pushed_seq = newtp->write_seq;
+		newtp->write_seq = newtp->pushed_seq =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -596,7 +624,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1 +
+					 tcp_s_data_size(tcp_sk(sk))))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e59fa5a..e1553d3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -370,15 +370,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
+#define OPTION_COOKIE_EXTENSION	(1 << 4)
 
 struct tcp_out_options {
 	u8 options;		/* bit field of OPTION_* */
 	u8 ws;			/* window scale, 0 to disable */
+	u8 hash_size;		/* bytes in hash */
 	u8 num_sack_blocks;	/* number of SACK blocks to include */
 	u16 mss;		/* 0 to disable */
 	__u32 tsval, tsecr;	/* need to include OPTION_TS */
+	__u8 *hash_location;	/* temporary pointer, overloaded */
 };
 
+/* The sysctl int routines are generic, so check consistency here.
+ */
+static u8 tcp_cookie_size_check(u8 desired)
+{
+	if (desired > 0) {
+		/* previously specified */
+		return desired;
+	}
+	if (sysctl_tcp_cookie_size <= 0) {
+		/* no default specified */
+		return 0;
+	}
+	if (sysctl_tcp_cookie_size < TCP_COOKIE_MIN) {
+		/* value too small, increase to minimum */
+		return TCP_COOKIE_MIN;
+	}
+	if (sysctl_tcp_cookie_size > TCP_COOKIE_MAX) {
+		/* value too large, decrease to maximum */
+		return TCP_COOKIE_MAX;
+	}
+	if (0x1 & sysctl_tcp_cookie_size) {
+		/* 8-bit multiple, illegal, fix it */
+		return (u8)(sysctl_tcp_cookie_size + 0x1);
+	}
+	return (u8)sysctl_tcp_cookie_size;
+}
+
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -393,17 +423,34 @@ struct tcp_out_options {
  * (but it may well be that other scenarios fail similarly).
  */
 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
-			      const struct tcp_out_options *opts,
-			      __u8 **md5_hash) {
-	if (unlikely(OPTION_MD5 & opts->options)) {
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_MD5SIG << 8) |
-			       TCPOLEN_MD5SIG);
-		*md5_hash = (__u8 *)ptr;
+			      struct tcp_out_options *opts)
+{
+	u8 options = opts->options;	/* mungable copy */
+
+	/* Having both authentication and cookies for security is redundant,
+	 * and there's certainly not enough room.  Instead, the cookie-less
+	 * extension variant is proposed.
+	 *
+	 * Consider the pessimal case with authentication.  The options
+	 * could look like:
+	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
+	 */
+	if (unlikely(OPTION_MD5 & options)) {
+		if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+			*ptr++ = htonl((TCPOPT_COOKIE << 24) |
+				       (TCPOLEN_COOKIE_BASE << 16) |
+				       (TCPOPT_MD5SIG << 8) |
+				       TCPOLEN_MD5SIG);
+		} else {
+			*ptr++ = htonl((TCPOPT_NOP << 24) |
+				       (TCPOPT_NOP << 16) |
+				       (TCPOPT_MD5SIG << 8) |
+				       TCPOLEN_MD5SIG);
+		}
+		options &= ~OPTION_COOKIE_EXTENSION;
+		/* overload cookie hash location */
+		opts->hash_location = (__u8 *)ptr;
 		ptr += 4;
-	} else {
-		*md5_hash = NULL;
 	}
 
 	if (unlikely(opts->mss)) {
@@ -412,12 +459,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			       opts->mss);
 	}
 
-	if (likely(OPTION_TS & opts->options)) {
-		if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+	if (likely(OPTION_TS & options)) {
+		if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
 				       TCPOLEN_TIMESTAMP);
+			options &= ~OPTION_SACK_ADVERTISE;
 		} else {
 			*ptr++ = htonl((TCPOPT_NOP << 24) |
 				       (TCPOPT_NOP << 16) |
@@ -428,15 +476,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		*ptr++ = htonl(opts->tsecr);
 	}
 
-	if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
-		     !(OPTION_TS & opts->options))) {
+	/* Specification requires after timestamp, so do it now.
+	 *
+	 * Consider the pessimal case without authentication.  The options
+	 * could look like:
+	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
+	 */
+	if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+		__u8 *cookie_copy = opts->hash_location;
+		u8 cookie_size = opts->hash_size;
+
+		if (unlikely(0x1 & cookie_size)) {
+			/* 8-bit multiple, illegal, ignore */
+			cookie_size = 0;
+		} else if (likely(0x2 & cookie_size)) {
+			__u8 *p = (__u8 *)ptr;
+
+			/* 16-bit multiple */
+			*p++ = TCPOPT_COOKIE;
+			*p++ = TCPOLEN_COOKIE_BASE + cookie_size;
+			*p++ = *cookie_copy++;
+			*p++ = *cookie_copy++;
+			ptr++;
+			cookie_size -= 2;
+		} else {
+			/* 32-bit multiple */
+			*ptr++ = htonl(((TCPOPT_NOP << 24) |
+					(TCPOPT_NOP << 16) |
+					(TCPOPT_COOKIE << 8) |
+					TCPOLEN_COOKIE_BASE) +
+				       cookie_size);
+		}
+
+		if (cookie_size > 0) {
+			memcpy(ptr, cookie_copy, cookie_size);
+			ptr += (cookie_size >> 2);
+		}
+	}
+
+	if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
 			       TCPOLEN_SACK_PERM);
 	}
 
-	if (unlikely(OPTION_WSCALE & opts->options)) {
+	if (unlikely(OPTION_WSCALE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_WINDOW << 16) |
 			       (TCPOLEN_WINDOW << 8) |
@@ -471,8 +556,12 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_out_options *opts,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned size = 0;
+	struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct dst_entry *dst = __sk_dst_get(sk);
+	unsigned size = 0;
+	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL)
+			 ? tcp_cookie_size_check(cvp->cookie_desired)
+			 : 0;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -517,6 +606,53 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			size += TCPOLEN_SACKPERM_ALIGNED;
 	}
 
+	/* Note that timestamps are required by the specification.
+	 *
+	 * Odd numbers of bytes are prohibited by the specification, ensuring
+	 * that the cookie is 16-bit aligned, and the resulting cookie pair is
+	 * 32-bit aligned.
+	 */
+	if (*md5 == NULL
+	 && (OPTION_TS & opts->options)
+	 && cookie_size > 0) {
+		int need = TCPOLEN_COOKIE_BASE + cookie_size;
+		int remaining = MAX_TCP_OPTION_SPACE - size;
+
+		if (0x2 & need) {
+			/* 32-bit multiple */
+			need += 2; /* NOPs */
+
+			if (need > remaining) {
+				/* try shrinking cookie to fit */
+				cookie_size -= 2;
+				need -= 4;
+			}
+		}
+		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
+			cookie_size -= 4;
+			need -= 4;
+		}
+		if (TCP_COOKIE_MIN <= cookie_size) {
+			opts->options |= OPTION_COOKIE_EXTENSION;
+			opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
+			opts->hash_size = cookie_size;
+
+			/* Remember for future incarnations. */
+			cvp->cookie_desired = cookie_size;
+
+			if (cvp->cookie_desired != cvp->cookie_pair_size) {
+				/* Currently use random bytes as a nonce,
+				 * assuming these are completely unpredictable
+				 * by hostile users of the same system.
+				 */
+				get_random_bytes(&cvp->cookie_pair[0],
+						 cookie_size);
+				cvp->cookie_pair_size = cookie_size;
+			}
+
+			size += need;
+		}
+	}
 	return size;
 }
 
@@ -632,7 +768,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	struct tcp_out_options opts;
 	unsigned tcp_options_size, tcp_header_size;
 	struct tcp_md5sig_key *md5;
-	__u8 *md5_hash_location;
 	struct tcphdr *th;
 	int err;
 
@@ -703,7 +838,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		}
 	}
 
-	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
@@ -711,7 +846,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-		tp->af_specific->calc_md5_hash(md5_hash_location,
+		tp->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, sk, NULL, skb);
 	}
 #endif
@@ -2235,14 +2370,13 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct request_values *rvp)
 {
+	struct tcp_out_options opts;
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcphdr *th;
-	int tcp_header_size;
-	struct tcp_out_options opts;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
-	__u8 *md5_hash_location;
+	int tcp_header_size;
 	int mss;
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
@@ -2303,14 +2437,14 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(min(req->rcv_wnd, 65535U));
-	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
 	if (md5) {
-		tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location,
+		tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, NULL, req, skb);
 	}
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3e327bc..2b16f9a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1865,6 +1865,19 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [net-next-2.6 PATCH v6 7/7 RFC] TCPCT part 1g: Responder Cookie => Initiator
  2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
                   ` (5 preceding siblings ...)
  2009-11-13  5:40 ` [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder William Allen Simpson
@ 2009-11-13  5:53 ` William Allen Simpson
  6 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13  5:53 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 1264 bytes --]

Calculate and format TCP_COOKIE SYNACK option.

Process optional SYNACK data.

This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
   TCPCT part 1d: define TCP cookie option, extend existing struct's
   TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS
   TCPCT part 1f: Initiator Cookie => Responder

Signed-off-by: William.Allen.Simpson@gmail.com
---
  include/linux/tcp.h      |   11 ++++++
  include/net/tcp.h        |    1 +
  net/ipv4/syncookies.c    |    5 ++-
  net/ipv4/tcp_input.c     |   84 ++++++++++++++++++++++++++++++++++++++++------
  net/ipv4/tcp_ipv4.c      |   46 +++++++++++++++++++++++--
  net/ipv4/tcp_minisocks.c |   14 ++++---
  net/ipv4/tcp_output.c    |   75 ++++++++++++++++++++++++++++++++++++++--
  net/ipv6/syncookies.c    |    5 ++-
  net/ipv6/tcp_ipv6.c      |   51 +++++++++++++++++++++++++++-
  9 files changed, 261 insertions(+), 31 deletions(-)

[-- Attachment #2: TCPCT+1g6.patch --]
[-- Type: text/plain, Size: 18598 bytes --]

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6c5ff66..bbedc80 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -223,6 +223,17 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 	return (tcp_hdr(skb)->doff - 5) * 4;
 }
 
+static inline unsigned int tcp_header_len_th(const struct tcphdr *th)
+{
+	return th->doff * 4;
+}
+
+/* When doff is bad, this could be negative. */
+static inline int tcp_option_len_th(const struct tcphdr *th)
+{
+	return (int)(th->doff * 4) - sizeof(*th);
+}
+
 /* This defines a selective acknowledgement block. */
 struct tcp_sack_block_wire {
 	__be32	start_seq;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3a4c840..14796c4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -407,6 +407,7 @@ extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 
 extern void			tcp_parse_options(struct sk_buff *skb,
 						  struct tcp_options_received *opt_rx,
+						  u8 **hvpp,
 						  int estab,
 						  struct dst_entry *dst);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3146cc4..26399ad 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp);
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt)
 {
+	struct tcp_options_received tcp_opt;
+	u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
-	struct tcp_options_received tcp_opt;
 
 	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
@@ -341,7 +342,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst);
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst);
 
 	if (tcp_opt.saw_tstamp)
 		cookie_check_timestamp(&tcp_opt);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc306ac..104fe2d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3698,7 +3698,7 @@ old_ack:
  * the fast version below fails.
  */
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       int estab,  struct dst_entry *dst)
+		       u8 **hvpp, int estab,  struct dst_entry *dst)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3785,6 +3785,19 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				 */
 				break;
 #endif
+			case TCPOPT_COOKIE:
+				/* This option carries 3 different lengths.
+				 */
+				if (TCPOLEN_COOKIE_MAX >= opsize
+				 && TCPOLEN_COOKIE_MIN <= opsize) {
+					opt_rx->cookie_plus = opsize;
+					*hvpp = ptr;
+				} else if (TCPOLEN_COOKIE_PAIR == opsize) {
+					/* not yet implemented */
+				} else if (TCPOLEN_COOKIE_BASE == opsize) {
+					/* not yet implemented */
+				}
+				break;
 			}
 
 			ptr += opsize-2;
@@ -3813,17 +3826,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
  * If it is wrong it falls back on tcp_parse_options().
  */
 static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-				  struct tcp_sock *tp)
+				  struct tcp_sock *tp, u8 **hvpp)
 {
-	if (th->doff == sizeof(struct tcphdr) >> 2) {
+	/* In the spirit of fast parsing, compare doff directly to constant
+	 * values.  Because equality is used, short doff can be ignored here.
+	 */
+	if (th->doff == (sizeof(*th) / 4)) {
 		tp->rx_opt.saw_tstamp = 0;
 		return 0;
 	} else if (tp->rx_opt.tstamp_ok &&
-		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+		   th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return 1;
 	}
-	tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
+	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
 	return 1;
 }
 
@@ -5077,11 +5093,13 @@ out:
 static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 			      struct tcphdr *th, int syn_inerr)
 {
+	u8 *hash_location;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* RFC1323: H1. Apply PAWS check first. */
-	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-	    tcp_paws_discard(sk, skb)) {
+	if (tcp_fast_parse_options(skb, th, tp, &hash_location)
+	 && tp->rx_opt.saw_tstamp
+	 && tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
 			tcp_send_dupack(sk, skb);
@@ -5368,12 +5386,15 @@ discard:
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	int saved_clamp = tp->rx_opt.mss_clamp;
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
+	struct tcp_cookie_values *cvp = tp->cookie_values;
+	int saved_clamp = tp->rx_opt.mss_clamp;
+	int queued = 0;
 
-	tcp_parse_options(skb, &tp->rx_opt, 0, dst);
+	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
 
 	if (th->ack) {
 		/* rfc793:
@@ -5470,6 +5491,44 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * Change state from SYN-SENT only after copied_seq
 		 * is initialized. */
 		tp->copied_seq = tp->rcv_nxt;
+
+		if (cvp != NULL
+		 && cvp->cookie_pair_size > 0
+		 && tp->rx_opt.cookie_plus > 0) {
+			int cookie_size = tp->rx_opt.cookie_plus
+					- TCPOLEN_COOKIE_BASE;
+			int cookie_pair_size = cookie_size
+					     + cvp->cookie_desired;
+
+			/* A cookie extension option was sent and returned.
+			 * Note that each incoming SYNACK replaces the
+			 * Responder cookie.  The initial exchange is most
+			 * fragile, as protection against spoofing relies
+			 * entirely upon the sequence and timestamp (above).
+			 * This replacement strategy allows the correct pair to
+			 * pass through, while any others will be filtered via
+			 * Responder verification later.
+			 */
+			if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
+				memcpy(&cvp->cookie_pair[cvp->cookie_desired],
+				       hash_location, cookie_size);
+				cvp->cookie_pair_size = cookie_pair_size;
+			}
+
+			if (tcp_header_len_th(th) < skb->len) {
+				/* Queue incoming transaction data. */
+				__skb_pull(skb, tcp_header_len_th(th));
+				__skb_queue_tail(&sk->sk_receive_queue, skb);
+				skb_set_owner_r(skb, sk);
+				sk->sk_data_ready(sk, 0);
+				cvp->s_data_in = 1; /* true */
+				queued = 1; /* should be amount? */
+				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+				tp->rcv_wup = TCP_SKB_CB(skb)->end_seq;
+				tp->copied_seq = TCP_SKB_CB(skb)->seq + 1;
+			}
+		}
+
 		smp_mb();
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
@@ -5521,11 +5580,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
 
 discard:
-			__kfree_skb(skb);
+			if (queued == 0)
+				__kfree_skb(skb);
 			return 0;
 		} else {
 			tcp_send_ack(sk);
 		}
+		if (queued > 0)
+			return 0; /* amount queued? */
 		return -1;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2ae1985..1597b66 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1213,9 +1213,12 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
+	u8 *hash_location;
 	struct request_sock *req;
 	struct inet_request_sock *ireq;
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct dst_entry *dst = NULL;
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
@@ -1271,15 +1274,49 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
-	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
+	tmp_opt.user_mss  = tp->rx_opt.user_mss;
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+
+	if (tmp_opt.cookie_plus > 0
+	 && tmp_opt.saw_tstamp
+	 && !tp->rx_opt.cookie_out_never
+	 && (sysctl_tcp_cookie_size > 0
+	  || (tp->cookie_values != NULL
+	   && tp->cookie_values->cookie_desired > 0))) {
+		u8 *c;
+		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
+		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
+
+		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
+			goto drop_and_release;
+
+		/* Secret recipe starts with IP addresses */
+		*mess++ ^= daddr;
+		*mess++ ^= saddr;
 
-	tcp_parse_options(skb, &tmp_opt, 0, dst);
+		/* plus variable length Initiator Cookie */
+		c = (u8 *)mess;
+		while (l-- > 0)
+			*c++ ^= *hash_location++;
+
+#ifdef CONFIG_SYN_COOKIES
+		want_cookie = 0;	/* not our kind of cookie */
+#endif
+		tmp_ext.cookie_out_never = 0; /* false */
+		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+	} else if (!tp->rx_opt.cookie_in_always) {
+		/* redundant indications, but ensure initialization. */
+		tmp_ext.cookie_out_never = 1; /* true */
+		tmp_ext.cookie_plus = 0;
+	} else {
+		goto drop_and_release;
+	}
+	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
 
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
-
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	if (security_inet_conn_request(sk, skb, req))
@@ -1339,7 +1376,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
-	if (__tcp_v4_send_synack(sk, dst, req, NULL)
+	if (__tcp_v4_send_synack(sk, dst, req,
+				 (struct request_values *)&tmp_ext)
 	 || want_cookie)
 		goto drop_and_free;
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2b50da8..7b01ea0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -96,13 +96,14 @@ enum tcp_tw_status
 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			   const struct tcphdr *th)
 {
-	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	struct tcp_options_received tmp_opt;
+	u8 *hash_location;
+	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
 		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, 1, NULL);
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
@@ -523,15 +524,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req,
 			   struct request_sock **prev)
 {
+	struct tcp_options_received tmp_opt;
+	u8 *hash_location;
+	struct sock *child;
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
-	struct tcp_options_received tmp_opt;
-	struct sock *child;
 
-	if ((th->doff > (sizeof(struct tcphdr)>>2)) && (req->ts_recent)) {
+	if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) {
 		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, 1, NULL);
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e1553d3..18708d3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -661,9 +661,14 @@ static unsigned tcp_synack_options(struct sock *sk,
 				   struct request_sock *req,
 				   unsigned mss, struct sk_buff *skb,
 				   struct tcp_out_options *opts,
-				   struct tcp_md5sig_key **md5) {
-	unsigned size = 0;
+				   struct tcp_md5sig_key **md5,
+				   struct tcp_extend_values *xvp)
+{
 	struct inet_request_sock *ireq = inet_rsk(req);
+	unsigned size = 0;
+	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never)
+			 ? xvp->cookie_plus
+			 : 0;
 	char doing_ts;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -702,6 +707,28 @@ static unsigned tcp_synack_options(struct sock *sk,
 			size += TCPOLEN_SACKPERM_ALIGNED;
 	}
 
+	/* Similar rationale to tcp_syn_options() applies here, too.
+	 * If the <SYN> options fit, the same options should fit now!
+	 */
+	if (*md5 == NULL
+	 && doing_ts
+	 && cookie_plus > TCPOLEN_COOKIE_BASE) {
+		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
+		int remaining = MAX_TCP_OPTION_SPACE - size;
+
+		if (0x2 & need) {
+			/* 32-bit multiple */
+			need += 2; /* NOPs */
+		}
+		if (need <= remaining) {
+			opts->options |= OPTION_COOKIE_EXTENSION;
+			opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
+			size += need;
+		} else {
+			/* There's no error return, so flag it. */
+			xvp->cookie_out_never = 1; /* true */
+		}
+	}
 	return size;
 }
 
@@ -2371,6 +2398,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_values *rvp)
 {
 	struct tcp_out_options opts;
+	struct tcp_extend_values *xvp = tcp_xv(rvp);
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcphdr *th;
@@ -2414,8 +2442,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 #endif
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	tcp_header_size = tcp_synack_options(sk, req, mss,
-					     skb, &opts, &md5) +
-			  sizeof(struct tcphdr);
+					     skb, &opts, &md5, xvp)
+			+ sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
@@ -2432,6 +2460,45 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	 */
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
 			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+
+	if (OPTION_COOKIE_EXTENSION & opts.options) {
+		const struct tcp_cookie_values *cvp = tp->cookie_values;
+
+		if (cvp != NULL
+		 && cvp->s_data_constant
+		 && cvp->s_data_desired > 0) {
+			u8 *buf = skb_put(skb, cvp->s_data_desired);
+
+			/* copy data directly from the listening socket. */
+			memcpy(buf, cvp->s_data_payload, cvp->s_data_desired);
+			TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired;
+		}
+
+		if (opts.hash_size > 0) {
+			__u32 workspace[SHA_WORKSPACE_WORDS];
+			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
+			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
+
+			/* Secret recipe depends on the Timestamp, (future)
+			 * Sequence and Acknowledgment Numbers, Initiator
+			 * Cookie, and others handled by IP variant caller.
+			 */
+			*tail-- ^= opts.tsval;
+			*tail-- ^= tcp_rsk(req)->rcv_isn + 1;
+			*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
+
+			/* recommended */
+			*tail-- ^= ((th->dest << 16) | th->source);
+			*tail-- ^= (u32)cvp; /* per sockopt */
+
+			sha_transform((__u32 *)&xvp->cookie_bakery[0],
+				      (char *)mess,
+				      &workspace[0]);
+			opts.hash_location =
+				(__u8 *)&xvp->cookie_bakery[0];
+		}
+	}
+
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 612fc53..5b9af50 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_options_received tcp_opt;
+	u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
@@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
-	struct tcp_options_received tcp_opt;
 
 	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
@@ -254,7 +255,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, 0, dst);
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
 
 	if (tcp_opt.saw_tstamp)
 		cookie_check_timestamp(&tcp_opt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b16f9a..5005ba2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1162,7 +1162,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
+	u8 *hash_location;
 	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1206,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+
+	if (tmp_opt.cookie_plus > 0
+	 && tmp_opt.saw_tstamp
+	 && !tp->rx_opt.cookie_out_never
+	 && (sysctl_tcp_cookie_size > 0
+	  || (tp->cookie_values != NULL
+	   && tp->cookie_values->cookie_desired > 0))) {
+		u8 *c;
+		u32 *d;
+		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
+		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
+
+		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
+			goto drop_and_free;
+
+		/* Secret recipe starts with IP addresses */
+		d = &ipv6_hdr(skb)->daddr.s6_addr32[0];
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		d = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+
+		/* plus variable length Initiator Cookie */
+		c = (u8 *)mess;
+		while (l-- > 0)
+			*c++ ^= *hash_location++;
 
-	tcp_parse_options(skb, &tmp_opt, 0, dst);
+#ifdef CONFIG_SYN_COOKIES
+		want_cookie = 0;	/* not our kind of cookie */
+#endif
+		tmp_ext.cookie_out_never = 0; /* false */
+		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+	} else if (!tp->rx_opt.cookie_in_always) {
+		/* redundant indications, but ensure initialization. */
+		tmp_ext.cookie_out_never = 1; /* true */
+		tmp_ext.cookie_plus = 0;
+	} else {
+		goto drop_and_free;
+	}
+	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1244,7 +1290,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	security_inet_conn_request(sk, skb, req);
 
-	if (tcp_v6_send_synack(sk, req, NULL)
+	if (tcp_v6_send_synack(sk, req,
+			       (struct request_values *)&tmp_ext)
 	 || want_cookie)
 		goto drop_and_free;
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13  4:17 ` [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie William Allen Simpson
@ 2009-11-13  6:21   ` Eric Dumazet
  2009-11-13 14:35     ` William Allen Simpson
  2009-11-13  6:26   ` Joe Perches
  1 sibling, 1 reply; 42+ messages in thread
From: Eric Dumazet @ 2009-11-13  6:21 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers

William Allen Simpson a écrit :
> Define (missing) hash message size for SHA1.
> 
> Define hashing size constants specific to TCP cookies, and add new
> function.
> 
> Maintain global secret values for tcp_cookie_generator().
> 
> This is a significantly revised implementation of earlier (15-year-old)
> Photuris [RFC-2522] code for the KA9Q cooperative multitasking platform.
> 
> Linux RCU technique appears to be well-suited to this application, though
> neither of the circular queue items are freed.
> 
> These functions will also be used in subsequent patches that implement
> additional features.
> 
> Signed-off-by: William.Allen.Simpson@gmail.com
> ---
>  include/linux/cryptohash.h |    1 +
>  include/net/tcp.h          |    8 +++
>  net/ipv4/tcp.c             |  146
> ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 155 insertions(+), 0 deletions(-)
> 

Small point :

+	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
+		spin_lock_bh(&tcp_secret_locker);
+		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
+			/* refreshed by another */
+			spin_unlock_bh(&tcp_secret_locker);
+			memcpy(bakery,
+			       &tcp_secret_generating->secrets[0],
+			       sizeof(tcp_secret_generating->secrets));

Technically speaking, you should perform the memcpy() before spin_unlock_bh()

+	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
+		spin_lock_bh(&tcp_secret_locker);
+		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
+			/* refreshed by another */
+			memcpy(bakery,
+			       &tcp_secret_generating->secrets[0],
+			       sizeof(tcp_secret_generating->secrets));
+			spin_unlock_bh(&tcp_secret_locker);


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13  4:17 ` [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie William Allen Simpson
  2009-11-13  6:21   ` Eric Dumazet
@ 2009-11-13  6:26   ` Joe Perches
  2009-11-13 14:51     ` William Allen Simpson
  1 sibling, 1 reply; 42+ messages in thread
From: Joe Perches @ 2009-11-13  6:26 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers

On Thu, 2009-11-12 at 23:17 -0500, William Allen Simpson wrote:
> +int tcp_cookie_generator(u32 *bakery)
> +{
> +	unsigned long jiffy = jiffies;
> +
> +	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
> +		spin_lock_bh(&tcp_secret_locker);
> +		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
> +			/* refreshed by another */
> +			spin_unlock_bh(&tcp_secret_locker);
> +			memcpy(bakery,
> +			       &tcp_secret_generating->secrets[0],
> +			       sizeof(tcp_secret_generating->secrets));
> +		} else {
> +			u32 secrets[COOKIE_WORKSPACE_WORDS];

Rather than using stack, could you use
u32 *secrets = &tcp_secret_secondary->secrets[0];



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-13  4:53 ` [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's William Allen Simpson
@ 2009-11-13  6:32   ` Eric Dumazet
  2009-11-13 16:06     ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: Eric Dumazet @ 2009-11-13  6:32 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers

William Allen Simpson a écrit :
> Data structures are carefully composed to require minimal additions.
> For example, the struct tcp_options_received cookie_plus variable fits
> between existing 16-bit and 8-bit variables, requiring no additional
> space (taking alignment into consideration).  There are no additions to
> tcp_request_sock, and only 1 pointer in tcp_sock.
> 
> This is a significantly revised implementation of an earlier (year-old)
> patch that no longer applies cleanly, with permission of the original
> author (Adam Langley):
> 
>    http://thread.gmane.org/gmane.linux.network/102586
> 
> The principle difference is using a TCP option to carry the cookie nonce,
> instead of a user configured offset in the data.  This is more flexible and
> less subject to user configuration error.  Such a cookie option has been
> suggested for many years, and is also useful without SYN data, allowing
> several related concepts to use the same extension option.
> 
>    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
>    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
> 
>    "Re: what a new TCP header might look like", May 12, 1998.
>    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
> 
> These functions will also be used in subsequent patches that implement
> additional features.
> 
> Requires:
>   TCPCT part 1a: add request_values parameter for sending SYNACK
>   TCPCT part 1b: generate Responder Cookie
>   TCPCT part 1c: sysctl_tcp_cookie_size, socket option
> TCP_COOKIE_TRANSACTIONS
> 
> Signed-off-by: William.Allen.Simpson@gmail.com
> ---
>  include/linux/tcp.h |   29 ++++++++++++++++----
>  include/net/tcp.h   |   72
> +++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 95 insertions(+), 6 deletions(-)
> 

+/**
+ * A tcp_sock contains a pointer to the current value, and this is cloned to
+ * the tcp_timewait_sock.
+ *
+ * @cookie_pair:	variable data from the option exchange.
+ *
+ * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
+ *			indicates default (sysctl_tcp_cookie_size).
+ *			After cookie sent, remembers size of cookie.
+ *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
+ *
+ * @s_data_desired:	user specified tcpct_s_data_desired.  When the
+ *			constant payload is specified (@s_data_constant),
+ *			holds its length instead.
+ *			Range 0 to TCP_MSS_DESIRED.
+ *
+ * @s_data_payload:	constant data that is to be included in the
+ *			payload of SYN or SYNACK segments when the
+ *			cookie option is present.
+ */

Thanks for this kerneldoc William ;)

But header should be :
/**
 *	struct tcp_cookie_values - Some description...



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13  5:10 ` [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
@ 2009-11-13 14:11   ` Andi Kleen
  2009-11-13 16:32     ` William Allen Simpson
  2009-11-18 15:03   ` William Allen Simpson
  1 sibling, 1 reply; 42+ messages in thread
From: Andi Kleen @ 2009-11-13 14:11 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers

William Allen Simpson <william.allen.simpson@gmail.com> writes:

> This is a straightforward re-implementation of an earlier (year-old)
> patch that no longer applies cleanly, with permission of the original
> author (Adam Langley):
>
>    http://thread.gmane.org/gmane.linux.network/102586
>
> The principle difference is using a TCP option to carry the cookie nonce,
> instead of a user configured offset in the data.
>
> Allocations have been rearranged to avoid requiring GFP_ATOMIC.
>
> Requires:
>   net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED
>   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
>   TCPCT part 1d: define TCP cookie option, extend existing struct's

BTW if you add new socket options you should always submit a manpage
patch to linux-man@vger.kernel.org too

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13  6:21   ` Eric Dumazet
@ 2009-11-13 14:35     ` William Allen Simpson
  0 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13 14:35 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Network Developers

Eric Dumazet wrote:
> Small point :
> 
> +	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
> +		spin_lock_bh(&tcp_secret_locker);
> +		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
> +			/* refreshed by another */
> +			spin_unlock_bh(&tcp_secret_locker);
> +			memcpy(bakery,
> +			       &tcp_secret_generating->secrets[0],
> +			       sizeof(tcp_secret_generating->secrets));
> 
> Technically speaking, you should perform the memcpy() before spin_unlock_bh()
> 
> +	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
> +		spin_lock_bh(&tcp_secret_locker);
> +		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
> +			/* refreshed by another */
> +			memcpy(bakery,
> +			       &tcp_secret_generating->secrets[0],
> +			       sizeof(tcp_secret_generating->secrets));
> +			spin_unlock_bh(&tcp_secret_locker);
> 
Technically speaking is exactly what I need!  I'd thought from your
earlier description of memory barriers that it should be after the unlock.
Done.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13  6:26   ` Joe Perches
@ 2009-11-13 14:51     ` William Allen Simpson
  2009-11-13 18:04       ` Joe Perches
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13 14:51 UTC (permalink / raw)
  To: Joe Perches
  Cc: Linux Kernel Network Developers, Eric Dumazet, Paul E. McKenney

[-- Attachment #1: Type: text/plain, Size: 411 bytes --]

Joe Perches wrote:
> Rather than using stack, could you use
> u32 *secrets = &tcp_secret_secondary->secrets[0];
> 
That was because I thought it needed to be copied after the unlock.
Since Eric says it can be inside the locked path, I can do away with
that entirely, and just use the result (bakery) pointer itself.  Saves a
memcpy, too.

Here's my revised attempt (untested).  Any other technical corrections?

[-- Attachment #2: TCPCT+1b6+.patch --]
[-- Type: text/plain, Size: 6856 bytes --]

diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index c118b2a..ec78a4b 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -2,6 +2,7 @@
 #define __CRYPTOHASH_H
 
 #define SHA_DIGEST_WORDS 5
+#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
 #define SHA_WORKSPACE_WORDS 80
 
 void sha_init(__u32 *buf);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ec183fd..4a99a8e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1478,6 +1478,14 @@ struct tcp_request_sock_ops {
 #endif
 };
 
+/* Using SHA1 for now, define some constants.
+ */
+#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
+#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
+#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
+
+extern int tcp_cookie_generator(u32 *bakery);
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e0cfa63..3ae01bf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -264,6 +264,7 @@
 #include <linux/cache.h>
 #include <linux/err.h>
 #include <linux/crypto.h>
+#include <linux/time.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -2842,6 +2843,136 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
+/**
+ * Each Responder maintains up to two secret values concurrently for
+ * efficient secret rollover.  Each secret value has 4 states:
+ *
+ * Generating.  (tcp_secret_generating != tcp_secret_primary)
+ *    Generates new Responder-Cookies, but not yet used for primary
+ *    verification.  This is a short-term state, typically lasting only
+ *    one round trip time (RTT).
+ *
+ * Primary.  (tcp_secret_generating == tcp_secret_primary)
+ *    Used both for generation and primary verification.
+ *
+ * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
+ *    Used for verification, until the first failure that can be
+ *    verified by the newer Generating secret.  At that time, this
+ *    cookie's state is changed to Secondary, and the Generating
+ *    cookie's state is changed to Primary.  This is a short-term state,
+ *    typically lasting only one round trip time (RTT).
+ *
+ * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
+ *    Used for secondary verification, after primary verification
+ *    failures.  This state lasts no more than twice the Maximum Segment
+ *    Lifetime (2MSL).  Then, the secret is discarded.
+ */
+struct tcp_cookie_secret {
+	/* The secret is divided into two parts.  The digest part is the
+	 * equivalent of previously hashing a secret and saving the state,
+	 * and serves as an initialization vector (IV).  The message part
+	 * serves as the trailing secret.
+	 */
+	u32				secrets[COOKIE_WORKSPACE_WORDS];
+	unsigned long			expires;
+};
+
+#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
+#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
+#define TCP_SECRET_LIFE (HZ * 600)
+
+static struct tcp_cookie_secret tcp_secret_one;
+static struct tcp_cookie_secret tcp_secret_two;
+
+/* Essentially a circular list, without dynamic allocation. */
+static struct tcp_cookie_secret *tcp_secret_generating;
+static struct tcp_cookie_secret *tcp_secret_primary;
+static struct tcp_cookie_secret *tcp_secret_retiring;
+static struct tcp_cookie_secret *tcp_secret_secondary;
+
+static DEFINE_SPINLOCK(tcp_secret_locker);
+
+/* Select a pseudo-random word in the cookie workspace.
+ */
+static inline u32 tcp_cookie_work(const u32 *ws, const int n)
+{
+	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
+}
+
+/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
+ * Called in softirq context.
+ * Returns: 0 for success.
+ */
+int tcp_cookie_generator(u32 *bakery)
+{
+	unsigned long jiffy = jiffies;
+
+	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
+		spin_lock_bh(&tcp_secret_locker);
+		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
+			/* refreshed by another */
+			memcpy(bakery,
+			       &tcp_secret_generating->secrets[0],
+			       COOKIE_WORKSPACE_WORDS);
+			spin_unlock_bh(&tcp_secret_locker);
+		} else {
+			/* still needs refreshing */
+			get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
+
+			/* The first time, paranoia assumes that the
+			 * randomization function isn't as strong.  But,
+			 * this secret initialization is delayed until
+			 * the last possible moment (packet arrival).
+			 * Although that time is observable, it is
+			 * unpredictably variable.  Mash in the most
+			 * volatile clock bits available, and expire the
+			 * secret extra quickly.
+			 */
+			if (unlikely(tcp_secret_primary->expires ==
+				     tcp_secret_secondary->expires)) {
+				struct timespec tv;
+
+				getnstimeofday(&tv);
+				bakery[COOKIE_DIGEST_WORDS+0] ^=
+					(u32)tv.tv_nsec;
+
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_1MSL
+					+ (0x0f & tcp_cookie_work(bakery, 0));
+			} else {
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_LIFE
+					+ (0xff & tcp_cookie_work(bakery, 1));
+				tcp_secret_primary->expires = jiffy
+					+ TCP_SECRET_2MSL
+					+ (0x1f & tcp_cookie_work(bakery, 2));
+			}
+			memcpy(&tcp_secret_generating->secrets[0],
+			       bakery, COOKIE_WORKSPACE_WORDS);
+
+			rcu_assign_pointer(tcp_secret_generating,
+					   tcp_secret_secondary);
+			rcu_assign_pointer(tcp_secret_retiring,
+					   tcp_secret_primary);
+			spin_unlock_bh(&tcp_secret_locker);
+			/*
+			 * Neither call_rcu() or synchronize_rcu() are needed.
+			 * Retiring data is not freed.  It is replaced after
+			 * further (locked) pointer updates, and a quiet time
+			 * (minimum 1MSL, maximum LIFE - 2MSL).
+			 */
+		}
+	} else {
+		rcu_read_lock_bh();
+		memcpy(bakery,
+		       &rcu_dereference(tcp_secret_generating)->secrets[0],
+		       COOKIE_WORKSPACE_WORDS);
+		rcu_read_unlock_bh();
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcp_cookie_generator);
+
 void tcp_done(struct sock *sk)
 {
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
@@ -2876,6 +3012,7 @@ void __init tcp_init(void)
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
 	int order, i, max_share;
+	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -2969,6 +3106,15 @@ void __init tcp_init(void)
 	       tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
+
+	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
+	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
+	tcp_secret_one.expires = jiffy; /* past due */
+	tcp_secret_two.expires = jiffy; /* past due */
+	tcp_secret_generating = &tcp_secret_one;
+	tcp_secret_primary = &tcp_secret_one;
+	tcp_secret_retiring = &tcp_secret_two;
+	tcp_secret_secondary = &tcp_secret_two;
 }
 
 EXPORT_SYMBOL(tcp_close);
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-13  6:32   ` Eric Dumazet
@ 2009-11-13 16:06     ` William Allen Simpson
  2009-11-16 20:50       ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13 16:06 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Network Developers, Joe Perches

[-- Attachment #1: Type: text/plain, Size: 1190 bytes --]

Eric Dumazet wrote:
> +/**
> + * A tcp_sock contains a pointer to the current value, and this is cloned to
> + * the tcp_timewait_sock.
> + *
> + * @cookie_pair:	variable data from the option exchange.
> + *
> + * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
> + *			indicates default (sysctl_tcp_cookie_size).
> + *			After cookie sent, remembers size of cookie.
> + *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
> + *
> + * @s_data_desired:	user specified tcpct_s_data_desired.  When the
> + *			constant payload is specified (@s_data_constant),
> + *			holds its length instead.
> + *			Range 0 to TCP_MSS_DESIRED.
> + *
> + * @s_data_payload:	constant data that is to be included in the
> + *			payload of SYN or SYNACK segments when the
> + *			cookie option is present.
> + */
> 
> Thanks for this kerneldoc William ;)
> 
> But header should be :
> /**
>  *	struct tcp_cookie_values - Some description...
> 
Botheration.  Just unlucky in following another example (again).

A private message suggests including the initialization code here
(instead of part 1f) to avoid some page flipping.  Done.

Here's my revised attempt (untested).  Any technical corrections?





[-- Attachment #2: TCPCT+1d6+.patch --]
[-- Type: text/plain, Size: 10956 bytes --]

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eaa3113..6c5ff66 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -247,31 +247,38 @@ struct tcp_options_received {
 		sack_ok : 4,	/* SACK seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-/*	SACKs data	*/
+	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
+		cookie_out_never:1,
+		cookie_in_always:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
-	u16	user_mss;  	/* mss requested by user in ioctl */
+	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
-	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
+	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->cookie_plus = 0;
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
- * to increse this, although since:
+ * to increase this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
  * only four options will fit in a standard TCP header */
 #define TCP_NUM_SACKS 4
 
+struct tcp_cookie_values;
+struct tcp_request_sock_ops;
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
 #endif
-	u32			 	rcv_isn;
-	u32			 	snt_isn;
+	u32				rcv_isn;
+	u32				snt_isn;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -441,6 +448,12 @@ struct tcp_sock {
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
+
+	/* When the cookie options are generated and exchanged, then this
+	 * object holds a reference to them (cookie_values->kref).  Also
+	 * contains related tcp_cookie_transactions fields.
+	 */
+	struct tcp_cookie_values  *cookie_values;
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -459,6 +472,10 @@ struct tcp_timewait_sock {
 	u16			  tw_md5_keylen;
 	u8			  tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
 #endif
+	/* Few sockets in timewait have cookies; in that case, then this
+	 * object holds a reference to them (tw_cookie_values->kref).
+	 */
+	struct tcp_cookie_values  *tw_cookie_values;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 738b65f..3a4c840 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
+#include <linux/kref.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE		253	/* Cookie extension (experimental) */
 
 /*
  *     TCP option lengths
@@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
 #define TCPOLEN_MD5SIG         18
+#define TCPOLEN_COOKIE_BASE    2	/* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR    3	/* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MIN     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
+#define TCPOLEN_COOKIE_MAX     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -1482,6 +1488,83 @@ struct tcp_request_sock_ops {
 
 extern int tcp_cookie_generator(u32 *bakery);
 
+/**
+ *	struct tcp_cookie_values - each socket needs extra space for the
+ *	cookies, together with (optional) space for any SYN data.
+ *
+ *	A tcp_sock contains a pointer to the current value, and this is
+ *	cloned to the tcp_timewait_sock.
+ *
+ * @cookie_pair:	variable data from the option exchange.
+ *
+ * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
+ *			indicates default (sysctl_tcp_cookie_size).
+ *			After cookie sent, remembers size of cookie.
+ *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
+ *
+ * @s_data_desired:	user specified tcpct_s_data_desired.  When the
+ *			constant payload is specified (@s_data_constant),
+ *			holds its length instead.
+ *			Range 0 to TCP_MSS_DESIRED.
+ *
+ * @s_data_payload:	constant data that is to be included in the
+ *			payload of SYN or SYNACK segments when the
+ *			cookie option is present.
+ */
+struct tcp_cookie_values {
+	struct kref	kref;
+	u8		cookie_pair[TCP_COOKIE_PAIR_SIZE];
+	u8		cookie_pair_size;
+	u8		cookie_desired;
+	u16		s_data_desired:11,
+			s_data_constant:1,
+			s_data_in:1,
+			s_data_out:1,
+			s_data_unused:2;
+	u8		s_data_payload[0];
+};
+
+static inline void tcp_cookie_values_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tcp_cookie_values, kref));
+}
+
+/* The length of constant payload data.  Note that s_data_desired is
+ * overloaded, depending on s_data_constant: either the length of constant
+ * data (returned here) or the limit on variable data.
+ */
+static inline int tcp_s_data_size(const struct tcp_sock *tp)
+{
+	return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
+		? tp->cookie_values->s_data_desired
+		: 0;
+}
+
+/**
+ *	struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
+ *
+ *	As tcp_request_sock has already been extended in other places, the
+ *	only remaining method is to pass stack values along as function
+ *	parameters.  These parameters are not needed after sending SYNACK.
+ *
+ * @cookie_bakery:	cryptographic secret and message workspace.
+ *
+ * @cookie_plus:	bytes in authenticator/cookie option, copied from
+ *			struct tcp_options_received (above).
+ */
+struct tcp_extend_values {
+	struct request_values		rv;
+	u32				cookie_bakery[COOKIE_WORKSPACE_WORDS];
+	u8				cookie_plus:6,
+					cookie_out_never:1,
+					cookie_in_always:1;
+};
+
+static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
+{
+	return (struct tcp_extend_values *)rvp;
+}
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 094231b..2ae1985 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1834,6 +1834,19 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv4_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
@@ -1887,6 +1900,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+	/* TCP Cookie Transactions */
+	if (tp->cookie_values != NULL) {
+		kref_put(&tp->cookie_values->kref,
+			 tcp_cookie_values_release);
+		tp->cookie_values = NULL;
+	}
+
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a42990..2b50da8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -389,14 +389,42 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
-		struct tcp_sock *newtp;
+		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
+
+		/* TCP Cookie Transactions require space for the cookie pair,
+		 * as it differs for each connection.  There is no need to
+		 * copy any s_data_payload stored at the original socket.
+		 * Failure will prevent resuming the connection.
+		 *
+		 * Presumed copied, in order of appearance:
+		 *	cookie_in_always, cookie_out_never
+		 */
+		if (oldcvp != NULL) {
+			struct tcp_cookie_values *newcvp =
+				kzalloc(sizeof(*newtp->cookie_values),
+					GFP_ATOMIC);
+
+			if (newcvp != NULL) {
+				kref_init(&newcvp->kref);
+				newcvp->cookie_desired =
+						oldcvp->cookie_desired;
+				newtp->cookie_values = newcvp;
+			} else {
+				/* Not Yet Implemented */
+				newtp->cookie_values = NULL;
+			}
+		}
 
 		/* Now setup tcp_sock */
-		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
-		newtp->snd_up = treq->snt_isn + 1;
+
+		newtp->rcv_wup = newtp->copied_seq =
+		newtp->rcv_nxt = treq->rcv_isn + 1;
+
+		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt =
+		newtp->snd_up = treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		tcp_prequeue_init(newtp);
 
@@ -429,8 +457,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = treq->snt_isn + 1;
-		newtp->pushed_seq = newtp->write_seq;
+		newtp->write_seq = newtp->pushed_seq =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -596,7 +624,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1 +
+					 tcp_s_data_size(tcp_sk(sk))))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3e327bc..2b16f9a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1865,6 +1865,19 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13 14:11   ` Andi Kleen
@ 2009-11-13 16:32     ` William Allen Simpson
  0 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13 16:32 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Linux Kernel Network Developers

Andi Kleen wrote:
> BTW if you add new socket options you should always submit a manpage
> patch to linux-man@vger.kernel.org too
> 
Thanks muchly, I'd wondered where one does that!  I'll do that as soon
as the code goes into the net tree (rather than net-next) for staging
out to the main line, as otherwise that could be pretty confusing for
others (non-existent options).

I'd found Documentation/networking/ip-sysctl.txt, updated in part 1c.

Any technical corrections to this patch (or part 1c)?

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder
  2009-11-13  5:40 ` [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder William Allen Simpson
@ 2009-11-13 16:51   ` William Allen Simpson
  2009-11-16 21:35     ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13 16:51 UTC (permalink / raw)
  To: Linux Kernel Network Developers; +Cc: Eric Dumazet, Joe Perches

[-- Attachment #1: Type: text/plain, Size: 148 bytes --]

Moving the initialization (and destruction) to part 1d makes this about as
short and easy to analyze as possible.  Any more technical observations?

[-- Attachment #2: TCPCT+1f6+.patch --]
[-- Type: text/plain, Size: 9516 bytes --]

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e59fa5a..e1553d3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -370,15 +370,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
+#define OPTION_COOKIE_EXTENSION	(1 << 4)
 
 struct tcp_out_options {
 	u8 options;		/* bit field of OPTION_* */
 	u8 ws;			/* window scale, 0 to disable */
 	u8 num_sack_blocks;	/* number of SACK blocks to include */
+	u8 hash_size;		/* bytes in hash_location */
 	u16 mss;		/* 0 to disable */
 	__u32 tsval, tsecr;	/* need to include OPTION_TS */
+	__u8 *hash_location;	/* temporary pointer, overloaded */
 };
 
+/* The sysctl int routines are generic, so check consistency here.
+ */
+static u8 tcp_cookie_size_check(u8 desired)
+{
+	if (desired > 0) {
+		/* previously specified */
+		return desired;
+	}
+	if (sysctl_tcp_cookie_size <= 0) {
+		/* no default specified */
+		return 0;
+	}
+	if (sysctl_tcp_cookie_size < TCP_COOKIE_MIN) {
+		/* value too small, increase to minimum */
+		return TCP_COOKIE_MIN;
+	}
+	if (sysctl_tcp_cookie_size > TCP_COOKIE_MAX) {
+		/* value too large, decrease to maximum */
+		return TCP_COOKIE_MAX;
+	}
+	if (0x1 & sysctl_tcp_cookie_size) {
+		/* 8-bit multiple, illegal, fix it */
+		return (u8)(sysctl_tcp_cookie_size + 0x1);
+	}
+	return (u8)sysctl_tcp_cookie_size;
+}
+
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -393,17 +423,34 @@ struct tcp_out_options {
  * (but it may well be that other scenarios fail similarly).
  */
 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
-			      const struct tcp_out_options *opts,
-			      __u8 **md5_hash) {
-	if (unlikely(OPTION_MD5 & opts->options)) {
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_MD5SIG << 8) |
-			       TCPOLEN_MD5SIG);
-		*md5_hash = (__u8 *)ptr;
+			      struct tcp_out_options *opts)
+{
+	u8 options = opts->options;	/* mungable copy */
+
+	/* Having both authentication and cookies for security is redundant,
+	 * and there's certainly not enough room.  Instead, the cookie-less
+	 * extension variant is proposed.
+	 *
+	 * Consider the pessimal case with authentication.  The options
+	 * could look like:
+	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
+	 */
+	if (unlikely(OPTION_MD5 & options)) {
+		if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+			*ptr++ = htonl((TCPOPT_COOKIE << 24) |
+				       (TCPOLEN_COOKIE_BASE << 16) |
+				       (TCPOPT_MD5SIG << 8) |
+				       TCPOLEN_MD5SIG);
+		} else {
+			*ptr++ = htonl((TCPOPT_NOP << 24) |
+				       (TCPOPT_NOP << 16) |
+				       (TCPOPT_MD5SIG << 8) |
+				       TCPOLEN_MD5SIG);
+		}
+		options &= ~OPTION_COOKIE_EXTENSION;
+		/* overload cookie hash location */
+		opts->hash_location = (__u8 *)ptr;
 		ptr += 4;
-	} else {
-		*md5_hash = NULL;
 	}
 
 	if (unlikely(opts->mss)) {
@@ -412,12 +459,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			       opts->mss);
 	}
 
-	if (likely(OPTION_TS & opts->options)) {
-		if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+	if (likely(OPTION_TS & options)) {
+		if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
 				       TCPOLEN_TIMESTAMP);
+			options &= ~OPTION_SACK_ADVERTISE;
 		} else {
 			*ptr++ = htonl((TCPOPT_NOP << 24) |
 				       (TCPOPT_NOP << 16) |
@@ -428,15 +476,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		*ptr++ = htonl(opts->tsecr);
 	}
 
-	if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
-		     !(OPTION_TS & opts->options))) {
+	/* Specification requires after timestamp, so do it now.
+	 *
+	 * Consider the pessimal case without authentication.  The options
+	 * could look like:
+	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
+	 */
+	if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+		__u8 *cookie_copy = opts->hash_location;
+		u8 cookie_size = opts->hash_size;
+
+		if (unlikely(0x1 & cookie_size)) {
+			/* 8-bit multiple, illegal, ignore */
+			cookie_size = 0;
+		} else if (likely(0x2 & cookie_size)) {
+			__u8 *p = (__u8 *)ptr;
+
+			/* 16-bit multiple */
+			*p++ = TCPOPT_COOKIE;
+			*p++ = TCPOLEN_COOKIE_BASE + cookie_size;
+			*p++ = *cookie_copy++;
+			*p++ = *cookie_copy++;
+			ptr++;
+			cookie_size -= 2;
+		} else {
+			/* 32-bit multiple */
+			*ptr++ = htonl(((TCPOPT_NOP << 24) |
+					(TCPOPT_NOP << 16) |
+					(TCPOPT_COOKIE << 8) |
+					TCPOLEN_COOKIE_BASE) +
+				       cookie_size);
+		}
+
+		if (cookie_size > 0) {
+			memcpy(ptr, cookie_copy, cookie_size);
+			ptr += (cookie_size >> 2);
+		}
+	}
+
+	if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
 			       TCPOLEN_SACK_PERM);
 	}
 
-	if (unlikely(OPTION_WSCALE & opts->options)) {
+	if (unlikely(OPTION_WSCALE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_WINDOW << 16) |
 			       (TCPOLEN_WINDOW << 8) |
@@ -471,8 +556,12 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_out_options *opts,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned size = 0;
+	struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct dst_entry *dst = __sk_dst_get(sk);
+	unsigned size = 0;
+	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL)
+			 ? tcp_cookie_size_check(cvp->cookie_desired)
+			 : 0;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -517,6 +606,53 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			size += TCPOLEN_SACKPERM_ALIGNED;
 	}
 
+	/* Note that timestamps are required by the specification.
+	 *
+	 * Odd numbers of bytes are prohibited by the specification, ensuring
+	 * that the cookie is 16-bit aligned, and the resulting cookie pair is
+	 * 32-bit aligned.
+	 */
+	if (*md5 == NULL
+	 && (OPTION_TS & opts->options)
+	 && cookie_size > 0) {
+		int need = TCPOLEN_COOKIE_BASE + cookie_size;
+		int remaining = MAX_TCP_OPTION_SPACE - size;
+
+		if (0x2 & need) {
+			/* 32-bit multiple */
+			need += 2; /* NOPs */
+
+			if (need > remaining) {
+				/* try shrinking cookie to fit */
+				cookie_size -= 2;
+				need -= 4;
+			}
+		}
+		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
+			cookie_size -= 4;
+			need -= 4;
+		}
+		if (TCP_COOKIE_MIN <= cookie_size) {
+			opts->options |= OPTION_COOKIE_EXTENSION;
+			opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
+			opts->hash_size = cookie_size;
+
+			/* Remember for future incarnations. */
+			cvp->cookie_desired = cookie_size;
+
+			if (cvp->cookie_desired != cvp->cookie_pair_size) {
+				/* Currently use random bytes as a nonce,
+				 * assuming these are completely unpredictable
+				 * by hostile users of the same system.
+				 */
+				get_random_bytes(&cvp->cookie_pair[0],
+						 cookie_size);
+				cvp->cookie_pair_size = cookie_size;
+			}
+
+			size += need;
+		}
+	}
 	return size;
 }
 
@@ -632,7 +768,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	struct tcp_out_options opts;
 	unsigned tcp_options_size, tcp_header_size;
 	struct tcp_md5sig_key *md5;
-	__u8 *md5_hash_location;
 	struct tcphdr *th;
 	int err;
 
@@ -703,7 +838,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		}
 	}
 
-	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
@@ -711,7 +846,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-		tp->af_specific->calc_md5_hash(md5_hash_location,
+		tp->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, sk, NULL, skb);
 	}
 #endif
@@ -2235,14 +2370,13 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct request_values *rvp)
 {
+	struct tcp_out_options opts;
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcphdr *th;
-	int tcp_header_size;
-	struct tcp_out_options opts;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
-	__u8 *md5_hash_location;
+	int tcp_header_size;
 	int mss;
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
@@ -2303,14 +2437,14 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(min(req->rcv_wnd, 65535U));
-	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
 	if (md5) {
-		tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location,
+		tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, NULL, req, skb);
 	}
 #endif
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13 14:51     ` William Allen Simpson
@ 2009-11-13 18:04       ` Joe Perches
  2009-11-16 14:39         ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: Joe Perches @ 2009-11-13 18:04 UTC (permalink / raw)
  To: William Allen Simpson
  Cc: Linux Kernel Network Developers, Eric Dumazet, Paul E. McKenney

On Fri, 2009-11-13 at 09:51 -0500, William Allen Simpson wrote:
> Since Eric says it can be inside the locked path, I can do away with
> that entirely, and just use the result (bakery) pointer itself.  Saves a
> memcpy, too.
> Here's my revised attempt (untested).  Any other technical corrections?

Just one, thanks for persisting.

> +	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
> +		spin_lock_bh(&tcp_secret_locker);
> +		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
[]
> +			spin_unlock_bh(&tcp_secret_locker);
> +		} else {
[]
> +			spin_unlock_bh(&tcp_secret_locker);
[removed comments]
> +		}

perhaps deleting the two unlocks above and moving it
to the same indent level here is trivially better.

		spin_unlock_bh(&tcp_secret_locker);



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13  4:31 ` [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
@ 2009-11-13 18:37   ` Joe Perches
  2009-11-13 19:45     ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: Joe Perches @ 2009-11-13 18:37 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers

On Thu, 2009-11-12 at 23:31 -0500, William Allen Simpson wrote:
> Define sysctl (tcp_cookie_size) to turn on and off the cookie option
> default globally, instead of a compiled configuration option.
[]

> +#define TCP_COOKIE_MIN		 8		/*  64-bits */
> +#define TCP_COOKIE_MAX		16		/* 128-bits */

perhaps something like:

static const int TCP_COOKIE_MIN = 8;
static const int TCP_COOKIE_MAX = 16;

[]

> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -714,6 +714,14 @@ static struct ctl_table ipv4_table[] = {
>  	},
>  	{
>  		.ctl_name	= CTL_UNNUMBERED,
> +		.procname	= "tcp_cookie_size",
> +		.data		= &sysctl_tcp_cookie_size,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec

with proc_dointvec_minmax

		.extra1		= &TCP_COOKIE_MIN,
		.extra2		= &TCP_COOKIE_MAX,

or even adding proc_dointvec_minmax_even
might save some cycles during cookie handling.



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13 18:37   ` Joe Perches
@ 2009-11-13 19:45     ` William Allen Simpson
  2009-11-14 15:43       ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-13 19:45 UTC (permalink / raw)
  To: Joe Perches; +Cc: Linux Kernel Network Developers

Joe Perches wrote:
> On Thu, 2009-11-12 at 23:31 -0500, William Allen Simpson wrote:
>> Define sysctl (tcp_cookie_size) to turn on and off the cookie option
>> default globally, instead of a compiled configuration option.
> []
> 
>> +#define TCP_COOKIE_MIN		 8		/*  64-bits */
>> +#define TCP_COOKIE_MAX		16		/* 128-bits */
> 
> perhaps something like:
> 
> static const int TCP_COOKIE_MIN = 8;
> static const int TCP_COOKIE_MAX = 16;
> 
> []
> 
>> --- a/net/ipv4/sysctl_net_ipv4.c
>> +++ b/net/ipv4/sysctl_net_ipv4.c
>> @@ -714,6 +714,14 @@ static struct ctl_table ipv4_table[] = {
>>  	},
>>  	{
>>  		.ctl_name	= CTL_UNNUMBERED,
>> +		.procname	= "tcp_cookie_size",
>> +		.data		= &sysctl_tcp_cookie_size,
>> +		.maxlen		= sizeof(int),
>> +		.mode		= 0644,
>> +		.proc_handler	= proc_dointvec
> 
> with proc_dointvec_minmax
> 
> 		.extra1		= &TCP_COOKIE_MIN,
> 		.extra2		= &TCP_COOKIE_MAX,
> 
> or even adding proc_dointvec_minmax_even
> might save some cycles during cookie handling.
> 
Well, that would have to be proc_dointvec_minmax_even_zero(), as the
valid values can be 0, 8, 10, 12, 14, or 16.  But my guess (based on
experience owning an ISP and teaching student operators) is that folks
will remember to turn it off (0) or on (1) or "bigger" (255), and not
really care about the finer gradations.

Error messages have a tendency to scroll off the screen and be forgotten.

And it seems to me a case of "premature optimization" -- it might save
1 or 2 tests in the order I've listed them in tcp_cookie_size_check(),
those tests only happen on the SYN the first time the _client_ calls,
and the result is saved for the future.  See part 1f:

+	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL)
+			 ? tcp_cookie_size_check(cvp->cookie_desired)
+			 : 0;

[]

+			/* Remember for future incarnations. */
+			cvp->cookie_desired = cookie_size;

On the server side, we have to test during parsing anyway.  I never trust
any data that comes over the network....

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13 19:45     ` William Allen Simpson
@ 2009-11-14 15:43       ` William Allen Simpson
  2009-11-16 20:40         ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-14 15:43 UTC (permalink / raw)
  To: Joe Perches; +Cc: Linux Kernel Network Developers

William Allen Simpson wrote:
> Joe Perches wrote:
>> or even adding proc_dointvec_minmax_even
>> might save some cycles during cookie handling.
>>
> Well, that would have to be proc_dointvec_minmax_even_zero(), as the
> valid values can be 0, 8, 10, 12, 14, or 16.  ...
> 
> And it seems to me a case of "premature optimization" -- it might save
> 1 or 2 tests in the order I've listed them in tcp_cookie_size_check(),
> ...
> 
> On the server side, we have to test during parsing anyway.  I never trust
> any data that comes over the network....
> 
I looked at this again today, and proc_dointvec_minmax_even_zero() still
seems to be overkill.  I couldn't find anybody else that does such things
at sysctl parsing time.

I did find we could eliminate a test for odd (in part 1f),

+		if (unlikely(0x1 & cookie_size)) {
+			/* 8-bit multiple, illegal, ignore */
+			cookie_size = 0;

by using a better test for the network data (in part 1g), and it
should be faster, too (assuming gcc is smart enough):

Was:
+				if (TCPOLEN_COOKIE_MAX >= opsize
+				 && TCPOLEN_COOKIE_MIN <= opsize) {

Now:
+				switch (opsize) {
+				case TCPOLEN_COOKIE_MIN+0:
+				case TCPOLEN_COOKIE_MIN+2:
+				case TCPOLEN_COOKIE_MIN+4:
+				case TCPOLEN_COOKIE_MIN+6:
+				case TCPOLEN_COOKIE_MAX:

(This division into so many parts for review is driving me crazy....)

Saved 2 if's for every cookie!  Of course, there's a cpu intensive
SHA1 for each cookie, so this pales in comparison.

Premature optimization or not, thanks for the ideas!


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-13 18:04       ` Joe Perches
@ 2009-11-16 14:39         ` William Allen Simpson
  2009-11-16 15:34           ` Eric Dumazet
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-16 14:39 UTC (permalink / raw)
  To: Joe Perches
  Cc: Linux Kernel Network Developers, Eric Dumazet, Paul E. McKenney

[-- Attachment #1: Type: text/plain, Size: 182 bytes --]

Joe Perches wrote:
> perhaps deleting the two unlocks above and moving it
> to the same indent level here is trivially better.
> 
Done.  I left the block comment.

Do I hear an Ack?

[-- Attachment #2: TCPCT+1b6++.patch --]
[-- Type: text/plain, Size: 6798 bytes --]

diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index c118b2a..ec78a4b 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -2,6 +2,7 @@
 #define __CRYPTOHASH_H
 
 #define SHA_DIGEST_WORDS 5
+#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
 #define SHA_WORKSPACE_WORDS 80
 
 void sha_init(__u32 *buf);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ec183fd..4a99a8e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1478,6 +1478,14 @@ struct tcp_request_sock_ops {
 #endif
 };
 
+/* Using SHA1 for now, define some constants.
+ */
+#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
+#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
+#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
+
+extern int tcp_cookie_generator(u32 *bakery);
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e0cfa63..86990e1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -264,6 +264,7 @@
 #include <linux/cache.h>
 #include <linux/err.h>
 #include <linux/crypto.h>
+#include <linux/time.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -2842,6 +2843,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
+/**
+ * Each Responder maintains up to two secret values concurrently for
+ * efficient secret rollover.  Each secret value has 4 states:
+ *
+ * Generating.  (tcp_secret_generating != tcp_secret_primary)
+ *    Generates new Responder-Cookies, but not yet used for primary
+ *    verification.  This is a short-term state, typically lasting only
+ *    one round trip time (RTT).
+ *
+ * Primary.  (tcp_secret_generating == tcp_secret_primary)
+ *    Used both for generation and primary verification.
+ *
+ * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
+ *    Used for verification, until the first failure that can be
+ *    verified by the newer Generating secret.  At that time, this
+ *    cookie's state is changed to Secondary, and the Generating
+ *    cookie's state is changed to Primary.  This is a short-term state,
+ *    typically lasting only one round trip time (RTT).
+ *
+ * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
+ *    Used for secondary verification, after primary verification
+ *    failures.  This state lasts no more than twice the Maximum Segment
+ *    Lifetime (2MSL).  Then, the secret is discarded.
+ */
+struct tcp_cookie_secret {
+	/* The secret is divided into two parts.  The digest part is the
+	 * equivalent of previously hashing a secret and saving the state,
+	 * and serves as an initialization vector (IV).  The message part
+	 * serves as the trailing secret.
+	 */
+	u32				secrets[COOKIE_WORKSPACE_WORDS];
+	unsigned long			expires;
+};
+
+#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
+#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
+#define TCP_SECRET_LIFE (HZ * 600)
+
+static struct tcp_cookie_secret tcp_secret_one;
+static struct tcp_cookie_secret tcp_secret_two;
+
+/* Essentially a circular list, without dynamic allocation. */
+static struct tcp_cookie_secret *tcp_secret_generating;
+static struct tcp_cookie_secret *tcp_secret_primary;
+static struct tcp_cookie_secret *tcp_secret_retiring;
+static struct tcp_cookie_secret *tcp_secret_secondary;
+
+static DEFINE_SPINLOCK(tcp_secret_locker);
+
+/* Select a pseudo-random word in the cookie workspace.
+ */
+static inline u32 tcp_cookie_work(const u32 *ws, const int n)
+{
+	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
+}
+
+/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
+ * Called in softirq context.
+ * Returns: 0 for success.
+ */
+int tcp_cookie_generator(u32 *bakery)
+{
+	unsigned long jiffy = jiffies;
+
+	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
+		spin_lock_bh(&tcp_secret_locker);
+		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
+			/* refreshed by another */
+			memcpy(bakery,
+			       &tcp_secret_generating->secrets[0],
+			       COOKIE_WORKSPACE_WORDS);
+		} else {
+			/* still needs refreshing */
+			get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
+
+			/* The first time, paranoia assumes that the
+			 * randomization function isn't as strong.  But,
+			 * this secret initialization is delayed until
+			 * the last possible moment (packet arrival).
+			 * Although that time is observable, it is
+			 * unpredictably variable.  Mash in the most
+			 * volatile clock bits available, and expire the
+			 * secret extra quickly.
+			 */
+			if (unlikely(tcp_secret_primary->expires ==
+				     tcp_secret_secondary->expires)) {
+				struct timespec tv;
+
+				getnstimeofday(&tv);
+				bakery[COOKIE_DIGEST_WORDS+0] ^=
+					(u32)tv.tv_nsec;
+
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_1MSL
+					+ (0x0f & tcp_cookie_work(bakery, 0));
+			} else {
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_LIFE
+					+ (0xff & tcp_cookie_work(bakery, 1));
+				tcp_secret_primary->expires = jiffy
+					+ TCP_SECRET_2MSL
+					+ (0x1f & tcp_cookie_work(bakery, 2));
+			}
+			memcpy(&tcp_secret_secondary->secrets[0],
+			       bakery, COOKIE_WORKSPACE_WORDS);
+
+			rcu_assign_pointer(tcp_secret_generating,
+					   tcp_secret_secondary);
+			rcu_assign_pointer(tcp_secret_retiring,
+					   tcp_secret_primary);
+			/*
+			 * Neither call_rcu() nor synchronize_rcu() needed.
+			 * Retiring data is not freed.  It is replaced after
+			 * further (locked) pointer updates, and a quiet time
+			 * (minimum 1MSL, maximum LIFE - 2MSL).
+			 */
+		}
+		spin_unlock_bh(&tcp_secret_locker);
+	} else {
+		rcu_read_lock_bh();
+		memcpy(bakery,
+		       &rcu_dereference(tcp_secret_generating)->secrets[0],
+		       COOKIE_WORKSPACE_WORDS);
+		rcu_read_unlock_bh();
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcp_cookie_generator);
+
 void tcp_done(struct sock *sk)
 {
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
@@ -2876,6 +3006,7 @@ void __init tcp_init(void)
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
 	int order, i, max_share;
+	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -2969,6 +3100,15 @@ void __init tcp_init(void)
 	       tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
+
+	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
+	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
+	tcp_secret_one.expires = jiffy; /* past due */
+	tcp_secret_two.expires = jiffy; /* past due */
+	tcp_secret_generating = &tcp_secret_one;
+	tcp_secret_primary = &tcp_secret_one;
+	tcp_secret_retiring = &tcp_secret_two;
+	tcp_secret_secondary = &tcp_secret_two;
 }
 
 EXPORT_SYMBOL(tcp_close);

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-16 14:39         ` William Allen Simpson
@ 2009-11-16 15:34           ` Eric Dumazet
  2009-11-16 20:06             ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: Eric Dumazet @ 2009-11-16 15:34 UTC (permalink / raw)
  To: William Allen Simpson
  Cc: Joe Perches, Linux Kernel Network Developers, Paul E. McKenney

William Allen Simpson a écrit :
> Joe Perches wrote:
>> perhaps deleting the two unlocks above and moving it
>> to the same indent level here is trivially better.
>>
> Done.  I left the block comment.
> 
> Do I hear an Ack?
> 

Fine by me William

But you forgot to re-include full commit message and Signed-off-by:

Yes it seems cumbersome, but David is handling ~50 patches per day, 
we shall give him as _perfect_ patches as possible.



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie
  2009-11-16 15:34           ` Eric Dumazet
@ 2009-11-16 20:06             ` William Allen Simpson
  0 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-16 20:06 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Joe Perches, Linux Kernel Network Developers, Paul E. McKenney

Eric Dumazet wrote:
> William Allen Simpson a écrit :
>> Do I hear an Ack?
>>
> Fine by me William
> 
> But you forgot to re-include full commit message and Signed-off-by:
> 
> Yes it seems cumbersome, but David is handling ~50 patches per day, 
> we shall give him as _perfect_ patches as possible.
> 
Agreed, I understood that I'd need to send out the _entire_ patch series
without the RFC designation before our leader would consider it.

Right now, I'm just collecting as many comments, improvements, and Acks as
possible before fetch, rebase, compiling, testing, and re-posting the
whole megillah again with "no fuzz" patches.

Hopefully, splitting this into more sub-parts (as you requested) enables
reviewing, but of course it's an exponential increase in preparation and
test time.  So, I'm trying to get some comments on every sub-part.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
  2009-11-14 15:43       ` William Allen Simpson
@ 2009-11-16 20:40         ` William Allen Simpson
  0 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-16 20:40 UTC (permalink / raw)
  To: Joe Perches; +Cc: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 102 bytes --]

Improved Documentation to match existing code:

   Odd values are interpreted as the next even value.

[-- Attachment #2: TCPCT+1c6+.patch --]
[-- Type: text/plain, Size: 5128 bytes --]

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a0e134d..820dd4b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -164,6 +164,14 @@ tcp_congestion_control - STRING
 	additional choices may be available based on kernel configuration.
 	Default is set as part of kernel configuration.
 
+tcp_cookie_size - INTEGER
+	Default size of TCP Cookie Transactions (TCPCT) option, that may be
+	overridden on a per socket basis by the TCPCT socket option.
+	Values greater than the maximum (16) are interpreted as the maximum.
+	Values greater than zero and less than the minimum (8) are interpreted
+	as the minimum.  Odd values are interpreted as the next even value.
+	Default: 0 (off).
+
 tcp_dsack - BOOLEAN
 	Allows TCP to send "duplicate" SACKs.
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 32d7d77..eaa3113 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -102,7 +102,9 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 
+/* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
 #define TCPI_OPT_WSCALE		4
@@ -174,6 +176,30 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
+/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */
+#define TCP_COOKIE_MIN		 8		/*  64-bits */
+#define TCP_COOKIE_MAX		16		/* 128-bits */
+#define TCP_COOKIE_PAIR_SIZE	(2*TCP_COOKIE_MAX)
+
+/* Flags for both getsockopt and setsockopt */
+#define TCP_COOKIE_IN_ALWAYS	(1 << 0)	/* Discard SYN without cookie */
+#define TCP_COOKIE_OUT_NEVER	(1 << 1)	/* Prohibit outgoing cookies,
+						 * supercedes everything. */
+
+/* Flags for getsockopt */
+#define TCP_S_DATA_IN		(1 << 2)	/* Was data received? */
+#define TCP_S_DATA_OUT		(1 << 3)	/* Was data sent? */
+
+/* TCP_COOKIE_TRANSACTIONS data */
+struct tcp_cookie_transactions {
+	__u16	tcpct_flags;			/* see above */
+	__u8	__tcpct_pad1;			/* zero */
+	__u8	tcpct_cookie_desired;		/* bytes */
+	__u16	tcpct_s_data_desired;		/* bytes of variable data */
+	__u16	tcpct_used;			/* bytes in value */
+	__u8	tcpct_value[TCP_MSS_DEFAULT];
+};
+
 #ifdef __KERNEL__
 
 #include <linux/skbuff.h>
@@ -227,6 +253,11 @@ struct tcp_options_received {
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
+static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
+{
+	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+}
+
 /* This is the max number of SACKS that we'll generate and process. It's safe
  * to increse this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
@@ -435,6 +466,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
 	return (struct tcp_timewait_sock *)sk;
 }
 
-#endif
+#endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4a99a8e..738b65f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -234,6 +234,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_cookie_size;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -340,11 +341,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
 
 extern void tcp_enter_quickack_mode(struct sock *sk);
 
-static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
-{
- 	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
-}
-
 #define	TCP_ECN_OK		1
 #define	TCP_ECN_QUEUE_CWR	2
 #define	TCP_ECN_DEMAND_CWR	4
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..3422c54 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -714,6 +714,14 @@ static struct ctl_table ipv4_table[] = {
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_cookie_size",
+		.data		= &sysctl_tcp_cookie_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
 		.maxlen		= sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1151cb8..e59fa5a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,14 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+#ifdef CONFIG_SYSCTL
+/* By default, let the user enable it. */
+int sysctl_tcp_cookie_size __read_mostly = 0;
+#else
+int sysctl_tcp_cookie_size __read_mostly = TCP_COOKIE_MAX;
+#endif
+
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-13 16:06     ` William Allen Simpson
@ 2009-11-16 20:50       ` William Allen Simpson
  2009-11-16 21:08         ` Eric Dumazet
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-16 20:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Network Developers, Joe Perches

[-- Attachment #1: Type: text/plain, Size: 338 bytes --]

William Allen Simpson wrote:
> Here's my revised attempt (untested).  Any technical corrections?
> 
Limited multiple (=) assignments to only 2 per line for readability
(that in existing code have 4 per line and run well beyond 80 characters,
triggering a warning in scripts/checkpatch.pl).

Still no technical corrections.  Seeking Acks.

[-- Attachment #2: TCPCT+1d6++.patch --]
[-- Type: text/plain, Size: 10948 bytes --]

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eaa3113..7fee8a4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -247,31 +247,38 @@ struct tcp_options_received {
 		sack_ok : 4,	/* SACK seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-/*	SACKs data	*/
+	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
+		cookie_out_never:1,
+		cookie_in_always:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
-	u16	user_mss;  	/* mss requested by user in ioctl */
+	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
-	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
+	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->cookie_plus = 0;
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
- * to increse this, although since:
+ * to increase this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
  * only four options will fit in a standard TCP header */
 #define TCP_NUM_SACKS 4
 
+struct tcp_cookie_values;
+struct tcp_request_sock_ops;
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
 #endif
-	u32			 	rcv_isn;
-	u32			 	snt_isn;
+	u32				rcv_isn;
+	u32				snt_isn;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -441,6 +448,12 @@ struct tcp_sock {
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
+
+	/* When the cookie options are generated and exchanged, then this
+	 * object holds a reference to them (cookie_values->kref).  Also
+	 * contains related tcp_cookie_transactions fields.
+	 */
+	struct tcp_cookie_values  *cookie_values;
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -459,6 +472,10 @@ struct tcp_timewait_sock {
 	u16			  tw_md5_keylen;
 	u8			  tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
 #endif
+	/* Few sockets in timewait have cookies; in that case, then this
+	 * object holds a reference to them (tw_cookie_values->kref).
+	 */
+	struct tcp_cookie_values  *tw_cookie_values;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 738b65f..f9abd9b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
+#include <linux/kref.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE		253	/* Cookie extension (experimental) */
 
 /*
  *     TCP option lengths
@@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
 #define TCPOLEN_MD5SIG         18
+#define TCPOLEN_COOKIE_BASE    2	/* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR    3	/* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MIN     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
+#define TCPOLEN_COOKIE_MAX     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -1482,6 +1488,83 @@ struct tcp_request_sock_ops {
 
 extern int tcp_cookie_generator(u32 *bakery);
 
+/**
+ *	struct tcp_cookie_values - each socket needs extra space for the
+ *	cookies, together with (optional) space for any SYN data.
+ *
+ *	A tcp_sock contains a pointer to the current value, and this is
+ *	cloned to the tcp_timewait_sock.
+ *
+ * @cookie_pair:	variable data from the option exchange.
+ *
+ * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
+ *			indicates default (sysctl_tcp_cookie_size).
+ *			After cookie sent, remembers size of cookie.
+ *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
+ *
+ * @s_data_desired:	user specified tcpct_s_data_desired.  When the
+ *			constant payload is specified (@s_data_constant),
+ *			holds its length instead.
+ *			Range 0 to TCP_MSS_DESIRED.
+ *
+ * @s_data_payload:	constant data that is to be included in the
+ *			payload of SYN or SYNACK segments when the
+ *			cookie option is present.
+ */
+struct tcp_cookie_values {
+	struct kref	kref;
+	u8		cookie_pair[TCP_COOKIE_PAIR_SIZE];
+	u8		cookie_pair_size;
+	u8		cookie_desired;
+	u16		s_data_desired:11,
+			s_data_constant:1,
+			s_data_in:1,
+			s_data_out:1,
+			s_data_unused:2;
+	u8		s_data_payload[0];
+};
+
+static inline void tcp_cookie_values_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tcp_cookie_values, kref));
+}
+
+/* The length of constant payload data.  Note that s_data_desired is
+ * overloaded, depending on s_data_constant: either the length of constant
+ * data (returned here) or the limit on variable data.
+ */
+static inline int tcp_s_data_size(const struct tcp_sock *tp)
+{
+	return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
+		? tp->cookie_values->s_data_desired
+		: 0;
+}
+
+/**
+ *	struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
+ *
+ *	As tcp_request_sock has already been extended in other places, the
+ *	only remaining method is to pass stack values along as function
+ *	parameters.  These parameters are not needed after sending SYNACK.
+ *
+ * @cookie_bakery:	cryptographic secret and message workspace.
+ *
+ * @cookie_plus:	bytes in authenticator/cookie option, copied from
+ *			struct tcp_options_received (above).
+ */
+struct tcp_extend_values {
+	struct request_values		rv;
+	u32				cookie_bakery[COOKIE_WORKSPACE_WORDS];
+	u8				cookie_plus:6,
+					cookie_out_never:1,
+					cookie_in_always:1;
+};
+
+static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
+{
+	return (struct tcp_extend_values *)rvp;
+}
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 397ab8f..2bb7864 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1834,6 +1834,19 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv4_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
@@ -1887,6 +1900,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+	/* TCP Cookie Transactions */
+	if (tp->cookie_values != NULL) {
+		kref_put(&tp->cookie_values->kref,
+			 tcp_cookie_values_release);
+		tp->cookie_values = NULL;
+	}
+
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a42990..53ef6d8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -389,14 +389,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
-		struct tcp_sock *newtp;
+		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
+
+		/* TCP Cookie Transactions require space for the cookie pair,
+		 * as it differs for each connection.  There is no need to
+		 * copy any s_data_payload stored at the original socket.
+		 * Failure will prevent resuming the connection.
+		 *
+		 * Presumed copied, in order of appearance:
+		 *	cookie_in_always, cookie_out_never
+		 */
+		if (oldcvp != NULL) {
+			struct tcp_cookie_values *newcvp =
+				kzalloc(sizeof(*newtp->cookie_values),
+					GFP_ATOMIC);
+
+			if (newcvp != NULL) {
+				kref_init(&newcvp->kref);
+				newcvp->cookie_desired =
+						oldcvp->cookie_desired;
+				newtp->cookie_values = newcvp;
+			} else {
+				/* Not Yet Implemented */
+				newtp->cookie_values = NULL;
+			}
+		}
 
 		/* Now setup tcp_sock */
-		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
-		newtp->snd_up = treq->snt_isn + 1;
+
+		newtp->rcv_wup = newtp->copied_seq =
+		newtp->rcv_nxt = treq->rcv_isn + 1;
+
+		newtp->snd_sml = newtp->snd_una =
+		newtp->snd_nxt = newtp->snd_up =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		tcp_prequeue_init(newtp);
 
@@ -429,8 +458,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = treq->snt_isn + 1;
-		newtp->pushed_seq = newtp->write_seq;
+		newtp->write_seq = newtp->pushed_seq =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -596,7 +625,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+	    (TCP_SKB_CB(skb)->ack_seq !=
+	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3e327bc..973096a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1865,6 +1865,19 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-16 20:50       ` William Allen Simpson
@ 2009-11-16 21:08         ` Eric Dumazet
  2009-11-16 22:09           ` William Allen Simpson
  0 siblings, 1 reply; 42+ messages in thread
From: Eric Dumazet @ 2009-11-16 21:08 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers, Joe Perches

William Allen Simpson a écrit :
> William Allen Simpson wrote:
>> Here's my revised attempt (untested).  Any technical corrections?
>>
> Limited multiple (=) assignments to only 2 per line for readability
> (that in existing code have 4 per line and run well beyond 80 characters,
> triggering a warning in scripts/checkpatch.pl).
> 
> Still no technical corrections.  Seeking Acks.
> 

I am a bit uneasy on this patch, since apparently you have infrastructure
to send DATA payload on SYN, but I thought it was an optional part of your 'RFC'
and as such, being implemented later ?

I remember a previous remark from David that our skb queues would not contain
DATA on SYN packets...



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder
  2009-11-13 16:51   ` William Allen Simpson
@ 2009-11-16 21:35     ` William Allen Simpson
  0 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-16 21:35 UTC (permalink / raw)
  To: Linux Kernel Network Developers; +Cc: Eric Dumazet, Joe Perches

[-- Attachment #1: Type: text/plain, Size: 262 bytes --]

William Allen Simpson wrote:
> Moving the initialization (and destruction) to part 1d makes this about as
> short and easy to analyze as possible.  Any more technical observations?
> 
Removed 8-bit test, the result of related comments in part 1c.

Seeking Acks.

[-- Attachment #2: TCPCT+1f6++.patch --]
[-- Type: text/plain, Size: 9477 bytes --]

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e59fa5a..34b750e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -370,15 +370,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
+#define OPTION_COOKIE_EXTENSION	(1 << 4)
 
 struct tcp_out_options {
 	u8 options;		/* bit field of OPTION_* */
 	u8 ws;			/* window scale, 0 to disable */
 	u8 num_sack_blocks;	/* number of SACK blocks to include */
+	u8 hash_size;		/* bytes in hash_location */
 	u16 mss;		/* 0 to disable */
 	__u32 tsval, tsecr;	/* need to include OPTION_TS */
+	__u8 *hash_location;	/* temporary pointer, overloaded */
 };
 
+/* The sysctl int routines are generic, so check consistency here.
+ */
+static u8 tcp_cookie_size_check(u8 desired)
+{
+	if (desired > 0) {
+		/* previously specified */
+		return desired;
+	}
+	if (sysctl_tcp_cookie_size <= 0) {
+		/* no default specified */
+		return 0;
+	}
+	if (sysctl_tcp_cookie_size < TCP_COOKIE_MIN) {
+		/* value too small, increase to minimum */
+		return TCP_COOKIE_MIN;
+	}
+	if (sysctl_tcp_cookie_size > TCP_COOKIE_MAX) {
+		/* value too large, decrease to maximum */
+		return TCP_COOKIE_MAX;
+	}
+	if (0x1 & sysctl_tcp_cookie_size) {
+		/* 8-bit multiple, illegal, fix it */
+		return (u8)(sysctl_tcp_cookie_size + 0x1);
+	}
+	return (u8)sysctl_tcp_cookie_size;
+}
+
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -393,17 +423,34 @@ struct tcp_out_options {
  * (but it may well be that other scenarios fail similarly).
  */
 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
-			      const struct tcp_out_options *opts,
-			      __u8 **md5_hash) {
-	if (unlikely(OPTION_MD5 & opts->options)) {
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_MD5SIG << 8) |
-			       TCPOLEN_MD5SIG);
-		*md5_hash = (__u8 *)ptr;
+			      struct tcp_out_options *opts)
+{
+	u8 options = opts->options;	/* mungable copy */
+
+	/* Having both authentication and cookies for security is redundant,
+	 * and there's certainly not enough room.  Instead, the cookie-less
+	 * extension variant is proposed.
+	 *
+	 * Consider the pessimal case with authentication.  The options
+	 * could look like:
+	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
+	 */
+	if (unlikely(OPTION_MD5 & options)) {
+		if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+			*ptr++ = htonl((TCPOPT_COOKIE << 24) |
+				       (TCPOLEN_COOKIE_BASE << 16) |
+				       (TCPOPT_MD5SIG << 8) |
+				       TCPOLEN_MD5SIG);
+		} else {
+			*ptr++ = htonl((TCPOPT_NOP << 24) |
+				       (TCPOPT_NOP << 16) |
+				       (TCPOPT_MD5SIG << 8) |
+				       TCPOLEN_MD5SIG);
+		}
+		options &= ~OPTION_COOKIE_EXTENSION;
+		/* overload cookie hash location */
+		opts->hash_location = (__u8 *)ptr;
 		ptr += 4;
-	} else {
-		*md5_hash = NULL;
 	}
 
 	if (unlikely(opts->mss)) {
@@ -412,12 +459,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			       opts->mss);
 	}
 
-	if (likely(OPTION_TS & opts->options)) {
-		if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+	if (likely(OPTION_TS & options)) {
+		if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
 				       TCPOLEN_TIMESTAMP);
+			options &= ~OPTION_SACK_ADVERTISE;
 		} else {
 			*ptr++ = htonl((TCPOPT_NOP << 24) |
 				       (TCPOPT_NOP << 16) |
@@ -428,15 +476,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		*ptr++ = htonl(opts->tsecr);
 	}
 
-	if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
-		     !(OPTION_TS & opts->options))) {
+	/* Specification requires after timestamp, so do it now.
+	 *
+	 * Consider the pessimal case without authentication.  The options
+	 * could look like:
+	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
+	 */
+	if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+		__u8 *cookie_copy = opts->hash_location;
+		u8 cookie_size = opts->hash_size;
+
+		/* 8-bit multiple handled in tcp_cookie_size_check() above,
+		 * and elsewhere.
+		 */
+		if (0x2 & cookie_size) {
+			__u8 *p = (__u8 *)ptr;
+
+			/* 16-bit multiple */
+			*p++ = TCPOPT_COOKIE;
+			*p++ = TCPOLEN_COOKIE_BASE + cookie_size;
+			*p++ = *cookie_copy++;
+			*p++ = *cookie_copy++;
+			ptr++;
+			cookie_size -= 2;
+		} else {
+			/* 32-bit multiple */
+			*ptr++ = htonl(((TCPOPT_NOP << 24) |
+					(TCPOPT_NOP << 16) |
+					(TCPOPT_COOKIE << 8) |
+					TCPOLEN_COOKIE_BASE) +
+				       cookie_size);
+		}
+
+		if (cookie_size > 0) {
+			memcpy(ptr, cookie_copy, cookie_size);
+			ptr += (cookie_size / 4);
+		}
+	}
+
+	if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
 			       TCPOLEN_SACK_PERM);
 	}
 
-	if (unlikely(OPTION_WSCALE & opts->options)) {
+	if (unlikely(OPTION_WSCALE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_WINDOW << 16) |
 			       (TCPOLEN_WINDOW << 8) |
@@ -471,8 +556,12 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_out_options *opts,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned size = 0;
+	struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct dst_entry *dst = __sk_dst_get(sk);
+	unsigned size = 0;
+	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL)
+			 ? tcp_cookie_size_check(cvp->cookie_desired)
+			 : 0;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -517,6 +606,53 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			size += TCPOLEN_SACKPERM_ALIGNED;
 	}
 
+	/* Note that timestamps are required by the specification.
+	 *
+	 * Odd numbers of bytes are prohibited by the specification, ensuring
+	 * that the cookie is 16-bit aligned, and the resulting cookie pair is
+	 * 32-bit aligned.
+	 */
+	if (*md5 == NULL
+	 && (OPTION_TS & opts->options)
+	 && cookie_size > 0) {
+		int need = TCPOLEN_COOKIE_BASE + cookie_size;
+		int remaining = MAX_TCP_OPTION_SPACE - size;
+
+		if (0x2 & need) {
+			/* 32-bit multiple */
+			need += 2; /* NOPs */
+
+			if (need > remaining) {
+				/* try shrinking cookie to fit */
+				cookie_size -= 2;
+				need -= 4;
+			}
+		}
+		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
+			cookie_size -= 4;
+			need -= 4;
+		}
+		if (TCP_COOKIE_MIN <= cookie_size) {
+			opts->options |= OPTION_COOKIE_EXTENSION;
+			opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
+			opts->hash_size = cookie_size;
+
+			/* Remember for future incarnations. */
+			cvp->cookie_desired = cookie_size;
+
+			if (cvp->cookie_desired != cvp->cookie_pair_size) {
+				/* Currently use random bytes as a nonce,
+				 * assuming these are completely unpredictable
+				 * by hostile users of the same system.
+				 */
+				get_random_bytes(&cvp->cookie_pair[0],
+						 cookie_size);
+				cvp->cookie_pair_size = cookie_size;
+			}
+
+			size += need;
+		}
+	}
 	return size;
 }
 
@@ -632,7 +768,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	struct tcp_out_options opts;
 	unsigned tcp_options_size, tcp_header_size;
 	struct tcp_md5sig_key *md5;
-	__u8 *md5_hash_location;
 	struct tcphdr *th;
 	int err;
 
@@ -703,7 +838,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		}
 	}
 
-	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
@@ -711,7 +846,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-		tp->af_specific->calc_md5_hash(md5_hash_location,
+		tp->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, sk, NULL, skb);
 	}
 #endif
@@ -2235,14 +2370,13 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct request_values *rvp)
 {
+	struct tcp_out_options opts;
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcphdr *th;
-	int tcp_header_size;
-	struct tcp_out_options opts;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
-	__u8 *md5_hash_location;
+	int tcp_header_size;
 	int mss;
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
@@ -2303,14 +2437,14 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(min(req->rcv_wnd, 65535U));
-	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
 	if (md5) {
-		tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location,
+		tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, NULL, req, skb);
 	}
 #endif

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-16 21:08         ` Eric Dumazet
@ 2009-11-16 22:09           ` William Allen Simpson
  2009-11-16 22:26             ` Eric Dumazet
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-16 22:09 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Network Developers, Joe Perches

Eric Dumazet wrote:
> I am a bit uneasy on this patch, since apparently you have infrastructure
> to send DATA payload on SYN, but I thought it was an optional part of your 'RFC'
> and as such, being implemented later ?
> 
There is nothing yet in this patch series to send data with a SYN.  Back in
early October, David required that the various s_data and cookie structures
be compressed and consolidated.  So, for the client side, the cookie_*
fields are filled and the s_data_* fields are zero (ignored), while the
server side can have both filled.

Moreover, *this* patch does nothing other than allocate and deallocate the
structure, zero filled by kzalloc().

SYN data will be implemented (much) later.


> I remember a previous remark from David that our skb queues would not contain
> DATA on SYN packets...
> 
I haven't seen anything by David, but there's an existing comment in
tcp_input.c at the place where SYN data will be added later:

			/* Now we have several options: In theory there is
			 * nothing else in the frame. KA9Q has an option to
			 * send data with the syn, BSD accepts data with the
			 * syn up to the [to be] advertised window and
			 * Solaris 2.1 gives you a protocol error. For now
			 * we just ignore it, that fits the spec precisely
			 * and avoids incompatibilities. It would be nice in
			 * future to drop through and process the data.

As you know, there's an old Linux t/tcp patch series out there to add SYN
data.  I'm also working with my KA9Q and BSD sources.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-16 22:09           ` William Allen Simpson
@ 2009-11-16 22:26             ` Eric Dumazet
  2009-11-17  3:15               ` David Miller
                                 ` (2 more replies)
  0 siblings, 3 replies; 42+ messages in thread
From: Eric Dumazet @ 2009-11-16 22:26 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: Linux Kernel Network Developers, Joe Perches

William Allen Simpson a écrit :
> Eric Dumazet wrote:
>> I am a bit uneasy on this patch, since apparently you have infrastructure
>> to send DATA payload on SYN, but I thought it was an optional part of
>> your 'RFC'
>> and as such, being implemented later ?
>>
> There is nothing yet in this patch series to send data with a SYN.  Back in
> early October, David required that the various s_data and cookie structures
> be compressed and consolidated.  So, for the client side, the cookie_*
> fields are filled and the s_data_* fields are zero (ignored), while the
> server side can have both filled.
> 
> Moreover, *this* patch does nothing other than allocate and deallocate the
> structure, zero filled by kzalloc().
> 
> SYN data will be implemented (much) later.

okay

> 
> 
>> I remember a previous remark from David that our skb queues would not
>> contain
>> DATA on SYN packets...
>>
> I haven't seen anything by David, but there's an existing comment in
> tcp_input.c at the place where SYN data will be added later:
> 

Yep, David comment was about another netdev thread, while tracking an obscure bug

http://www.spinics.net/lists/netdev/msg110759.html

http://www.spinics.net/lists/netdev/msg110764.html


So adding DATA to SYN packets might be problematic for part of our tcp stack.


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-16 22:26             ` Eric Dumazet
@ 2009-11-17  3:15               ` David Miller
  2009-11-17 10:41                 ` William Allen Simpson
  2009-11-17 12:18                 ` Ilpo Järvinen
  2009-11-17 12:07               ` Ilpo Järvinen
  2009-11-18 14:42               ` William Allen Simpson
  2 siblings, 2 replies; 42+ messages in thread
From: David Miller @ 2009-11-17  3:15 UTC (permalink / raw)
  To: eric.dumazet; +Cc: william.allen.simpson, netdev, joe

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Nov 2009 23:26:04 +0100

> So adding DATA to SYN packets might be problematic for part of our tcp stack.

I can almost guarentee it won't work.  For one thing getting a SACK
response to a SYN+DATA packet will explode quite nicely for one thing.

A lot of the other retransmit queue handling would need to be audited
as well.  So much code assumes that if we see sent data in the
retransmit queue, there won't be SYN or SYN+ACK things in there to
contend with.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-17  3:15               ` David Miller
@ 2009-11-17 10:41                 ` William Allen Simpson
  2009-11-17 12:18                 ` Ilpo Järvinen
  1 sibling, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-17 10:41 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, netdev, joe

David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 16 Nov 2009 23:26:04 +0100
> 
>> So adding DATA to SYN packets might be problematic for part of our tcp stack.
> 
But is outside the scope of anything in this part 1 patch series.

Of course, part 2 will dispense with the request_sock entirely....


> I can almost guarentee it won't work.  For one thing getting a SACK
> response to a SYN+DATA packet will explode quite nicely for one thing.
> 
Sounds like a lack of error handling to me....  Never trust that sensible
data will appear from the network!


> A lot of the other retransmit queue handling would need to be audited
> as well.  So much code assumes that if we see sent data in the
> retransmit queue, there won't be SYN or SYN+ACK things in there to
> contend with.
> 
Well then, it's a darn tootin' good thing that the code in part 1g doesn't
send SYNACK data in the retransmit queue, and the code in 1d (here) doesn't
include the SYNACK data in the tcp_minisocks.c clone.  The prominent
comment might have been a sign to the careful reviewer:

+		 * as it differs for each connection.  There is no need to
+		 * copy any s_data_payload stored at the original socket.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-16 22:26             ` Eric Dumazet
  2009-11-17  3:15               ` David Miller
@ 2009-11-17 12:07               ` Ilpo Järvinen
  2009-11-18 13:55                 ` William Allen Simpson
  2009-11-18 14:42               ` William Allen Simpson
  2 siblings, 1 reply; 42+ messages in thread
From: Ilpo Järvinen @ 2009-11-17 12:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: William Allen Simpson, Linux Kernel Network Developers, Joe Perches

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1077 bytes --]

On Mon, 16 Nov 2009, Eric Dumazet wrote:

> William Allen Simpson a écrit :
> > Eric Dumazet wrote:
> > 
> >> I remember a previous remark from David that our skb queues would not
> >> contain
> >> DATA on SYN packets...
> >>
> > I haven't seen anything by David, but there's an existing comment in
> > tcp_input.c at the place where SYN data will be added later:
> > 
> 
> Yep, David comment was about another netdev thread, while tracking an 
> obscure bug 
> 
> http://www.spinics.net/lists/netdev/msg110759.html
> 
> http://www.spinics.net/lists/netdev/msg110764.html
> 
> 
> So adding DATA to SYN packets might be problematic for part of our tcp 
> stack. 

You both are right (and that's what is causing confusion)... What DaveM 
said is this:

"And we're only dealing with data packets once we enter established
state, and when we enter established by definition we have unlinked
and freed up any SYN and SYN+ACK SKBs in the write queue."

We certainly can have SYN/SYNACKs in write queue but not in certain 
states, and consequently, not in certain functions.


-- 
 i.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-17  3:15               ` David Miller
  2009-11-17 10:41                 ` William Allen Simpson
@ 2009-11-17 12:18                 ` Ilpo Järvinen
  2009-11-17 12:22                   ` David Miller
  1 sibling, 1 reply; 42+ messages in thread
From: Ilpo Järvinen @ 2009-11-17 12:18 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, william.allen.simpson, Netdev, joe

On Mon, 16 Nov 2009, David Miller wrote:

> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 16 Nov 2009 23:26:04 +0100
> 
> > So adding DATA to SYN packets might be problematic for part of our tcp 
> > stack. 
> 
> I can almost guarentee it won't work.  For one thing getting a SACK
> response to a SYN+DATA packet will explode quite nicely for one thing.

Now I'm really lost??? How can you get SACKs for that in the first 
place since they are either lost or delivered in unison???

> A lot of the other retransmit queue handling would need to be audited
> as well.  So much code assumes that if we see sent data in the
> retransmit queue, there won't be SYN or SYN+ACK things in there to
> contend with.

...Valid ACKs will either remove that SYN+DATA segment completely, other 
ACK could (or should) just be dropped. I kind of fail to see what is the 
problem here.

-- 
 i.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-17 12:18                 ` Ilpo Järvinen
@ 2009-11-17 12:22                   ` David Miller
  2009-11-17 12:38                     ` Ilpo Järvinen
  0 siblings, 1 reply; 42+ messages in thread
From: David Miller @ 2009-11-17 12:22 UTC (permalink / raw)
  To: ilpo.jarvinen; +Cc: eric.dumazet, william.allen.simpson, netdev, joe

From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
Date: Tue, 17 Nov 2009 14:18:57 +0200 (EET)

> On Mon, 16 Nov 2009, David Miller wrote:
> 
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Mon, 16 Nov 2009 23:26:04 +0100
>> 
>> > So adding DATA to SYN packets might be problematic for part of our tcp 
>> > stack. 
>> 
>> I can almost guarentee it won't work.  For one thing getting a SACK
>> response to a SYN+DATA packet will explode quite nicely for one thing.
> 
> Now I'm really lost??? How can you get SACKs for that in the first 
> place since they are either lost or delivered in unison???

Ideally, you're probably right.

However, it seems to me that the receiver can do whatever it likes
with it's receive queue when it's under memory pressure.

It can chop packets up, partially free bits, and then send a SACK
block back to you for the parts it tried to free.

If you'll recall, I wanted to put some tough restrictions into what is
allowed with SACK so that we could optimize things on the sender side.
But there was resistence and therefore we have to keep allowing all
kinds of silly situations, the one we're talking about here merely
being one of them :-)

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-17 12:22                   ` David Miller
@ 2009-11-17 12:38                     ` Ilpo Järvinen
  2009-11-17 12:48                       ` David Miller
  0 siblings, 1 reply; 42+ messages in thread
From: Ilpo Järvinen @ 2009-11-17 12:38 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, william.allen.simpson, Netdev, joe

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1402 bytes --]

On Tue, 17 Nov 2009, David Miller wrote:

> From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
> Date: Tue, 17 Nov 2009 14:18:57 +0200 (EET)
> 
> > On Mon, 16 Nov 2009, David Miller wrote:
> > 
> >> From: Eric Dumazet <eric.dumazet@gmail.com>
> >> Date: Mon, 16 Nov 2009 23:26:04 +0100
> >> 
> >> > So adding DATA to SYN packets might be problematic for part of our tcp 
> >> > stack. 
> >> 
> >> I can almost guarentee it won't work.  For one thing getting a SACK
> >> response to a SYN+DATA packet will explode quite nicely for one thing.
> > 
> > Now I'm really lost??? How can you get SACKs for that in the first 
> > place since they are either lost or delivered in unison???
> 
> Ideally, you're probably right.
> 
> However, it seems to me that the receiver can do whatever it likes
> with it's receive queue when it's under memory pressure.
> 
> It can chop packets up, partially free bits, and then send a SACK
> block back to you for the parts it tried to free.
> 
> If you'll recall, I wanted to put some tough restrictions into what is
> allowed with SACK so that we could optimize things on the sender side.
> But there was resistence and therefore we have to keep allowing all
> kinds of silly situations, the one we're talking about here merely
> being one of them :-)

Right, but we could be just as crazy and just drop such things and keep 
resending SYN+DATA without any harm?

-- 
 i.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-17 12:38                     ` Ilpo Järvinen
@ 2009-11-17 12:48                       ` David Miller
  0 siblings, 0 replies; 42+ messages in thread
From: David Miller @ 2009-11-17 12:48 UTC (permalink / raw)
  To: ilpo.jarvinen; +Cc: eric.dumazet, william.allen.simpson, netdev, joe

From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
Date: Tue, 17 Nov 2009 14:38:42 +0200 (EET)

> Right, but we could be just as crazy and just drop such things and
> keep resending SYN+DATA without any harm?

Two crazies == sane? :-)

Sure, we could do that.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-17 12:07               ` Ilpo Järvinen
@ 2009-11-18 13:55                 ` William Allen Simpson
  2009-11-18 14:08                   ` Ilpo Järvinen
  0 siblings, 1 reply; 42+ messages in thread
From: William Allen Simpson @ 2009-11-18 13:55 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Eric Dumazet, Linux Kernel Network Developers, Joe Perches, David Miller

Ilpo Järvinen wrote:
> You both are right (and that's what is causing confusion)...
> 
In this case, for /this/ code, *none* of you are correct, and _that's_
causing confusion.  There is no independent retransmission of SYNACK data!
None!!  Nada!!!  There is no retransmission queue for SYNACK data.

SYN with SACK should never be sent, and should never be received -- and
already should be discarded and ignored (outside the scope of this patch).

As I've already mentioned in this thread 2 days ago, in my earlier patches
(now deferred to part 2 after the functional split requested by Eric and
Ilpo), the request_sock was removed entirely (just like syncookies).
There wasn't (and won't be) any struct or timer lying around to allow
SYNACK data retransmissions to occur.

Even now, the entire s_data_* edifice isn't passed to be retransmitted.
Note the code (in part 1a), already reviewed and Ack'd:

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4be2228..7a42990 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -537,7 +537,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
  		 * Enforce "SYN-ACK" according to figure 8, figure 6
  		 * of RFC793, fixed by RFC1122.
  		 */
-		req->rsk_ops->rtx_syn_ack(sk, req);
+		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
  		return NULL;
  	}

See that NULL?  There's no cookie data structure for retransmission!

Could we end this diversion into rampant speculation?

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-18 13:55                 ` William Allen Simpson
@ 2009-11-18 14:08                   ` Ilpo Järvinen
  0 siblings, 0 replies; 42+ messages in thread
From: Ilpo Järvinen @ 2009-11-18 14:08 UTC (permalink / raw)
  To: William Allen Simpson
  Cc: Eric Dumazet, Linux Kernel Network Developers, Joe Perches, David Miller

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1724 bytes --]

On Wed, 18 Nov 2009, William Allen Simpson wrote:

> Ilpo Järvinen wrote:
> > You both are right (and that's what is causing confusion)...
> > 
> In this case, for /this/ code, *none* of you are correct, and _that's_
> causing confusion. There is no independent retransmission of SYNACK data!
> None!!  Nada!!!  There is no retransmission queue for SYNACK data.
> 
> SYN with SACK should never be sent, and should never be received -- and
> already should be discarded and ignored (outside the scope of this patch).
> 
> As I've already mentioned in this thread 2 days ago, in my earlier patches
> (now deferred to part 2 after the functional split requested by Eric and
> Ilpo), the request_sock was removed entirely (just like syncookies).
> There wasn't (and won't be) any struct or timer lying around to allow
> SYNACK data retransmissions to occur.
> 
> Even now, the entire s_data_* edifice isn't passed to be retransmitted.
> Note the code (in part 1a), already reviewed and Ack'd:
> 
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 4be2228..7a42990 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -537,7 +537,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff
> *skb,
>  		 * Enforce "SYN-ACK" according to figure 8, figure 6
>  		 * of RFC793, fixed by RFC1122.
>  		 */
> -		req->rsk_ops->rtx_syn_ack(sk, req);
> +		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
>  		return NULL;
>  	}
> 
> See that NULL?  There's no cookie data structure for retransmission!
> 
> Could we end this diversion into rampant speculation?

...I was just commenting on the disagreement in the interpretation of what 
DaveM said vs comments you mentioned... :-)

-- 
 i.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's
  2009-11-16 22:26             ` Eric Dumazet
  2009-11-17  3:15               ` David Miller
  2009-11-17 12:07               ` Ilpo Järvinen
@ 2009-11-18 14:42               ` William Allen Simpson
  2 siblings, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-18 14:42 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Network Developers, Joe Perches, David Miller

[-- Attachment #1: Type: text/plain, Size: 1918 bytes --]

Eric Dumazet wrote:
> William Allen Simpson a écrit :
>> There is nothing yet in this patch series to send data with a SYN.  Back in
>> early October, David required that the various s_data and cookie structures
>> be compressed and consolidated.  So, for the client side, the cookie_*
>> fields are filled and the s_data_* fields are zero (ignored), while the
>> server side can have both filled.
>>
>> Moreover, *this* patch does nothing other than allocate and deallocate the
>> structure, zero filled by kzalloc().
>>
>> SYN data will be implemented (much) later.
> 
> okay
> 
Still no technical corrections.  Seeking Acks.

Trying to get review back on track.  The patch is still the same.

This patch allocates and deallocates the structures.  One main
difference (off the top of my head) from Alan's original patch is
that it uses sk->sk_allocation instead of GFP_ATOMIC in a couple of
places.  David Miller wanted to reduce the number of atomic
allocations, and sk->sk_allocation can be GFP_KERNEL or GFP_ATOMIC.

AFAICT, it's usually GFP_KERNEL in both tcp_v4_init_sock() and
tcp_v6_init_sock(), as they're called by inet_create() via
sk->sk_prot->init() shortly after calling sock_init_data() that set
sk->sk_allocation to GFP_KERNEL.

I used the existing variable just in case there are other code paths
that I've missed, and just in case there are future changes adding
code paths.

Limited multiple (=) assignments to only 2 per line for readability
(that in existing code have 4 per line and run well beyond 80
characters, triggering a warning in scripts/checkpatch.pl).

Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.


[-- Attachment #2: TCPCT+1d6++.patch --]
[-- Type: text/plain, Size: 10948 bytes --]

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eaa3113..7fee8a4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -247,31 +247,38 @@ struct tcp_options_received {
 		sack_ok : 4,	/* SACK seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-/*	SACKs data	*/
+	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
+		cookie_out_never:1,
+		cookie_in_always:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
-	u16	user_mss;  	/* mss requested by user in ioctl */
+	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
-	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
+	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->cookie_plus = 0;
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
- * to increse this, although since:
+ * to increase this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
  * only four options will fit in a standard TCP header */
 #define TCP_NUM_SACKS 4
 
+struct tcp_cookie_values;
+struct tcp_request_sock_ops;
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
 #endif
-	u32			 	rcv_isn;
-	u32			 	snt_isn;
+	u32				rcv_isn;
+	u32				snt_isn;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -441,6 +448,12 @@ struct tcp_sock {
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
+
+	/* When the cookie options are generated and exchanged, then this
+	 * object holds a reference to them (cookie_values->kref).  Also
+	 * contains related tcp_cookie_transactions fields.
+	 */
+	struct tcp_cookie_values  *cookie_values;
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -459,6 +472,10 @@ struct tcp_timewait_sock {
 	u16			  tw_md5_keylen;
 	u8			  tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
 #endif
+	/* Few sockets in timewait have cookies; in that case, then this
+	 * object holds a reference to them (tw_cookie_values->kref).
+	 */
+	struct tcp_cookie_values  *tw_cookie_values;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 738b65f..f9abd9b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
+#include <linux/kref.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE		253	/* Cookie extension (experimental) */
 
 /*
  *     TCP option lengths
@@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
 #define TCPOLEN_MD5SIG         18
+#define TCPOLEN_COOKIE_BASE    2	/* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR    3	/* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MIN     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
+#define TCPOLEN_COOKIE_MAX     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -1482,6 +1488,83 @@ struct tcp_request_sock_ops {
 
 extern int tcp_cookie_generator(u32 *bakery);
 
+/**
+ *	struct tcp_cookie_values - each socket needs extra space for the
+ *	cookies, together with (optional) space for any SYN data.
+ *
+ *	A tcp_sock contains a pointer to the current value, and this is
+ *	cloned to the tcp_timewait_sock.
+ *
+ * @cookie_pair:	variable data from the option exchange.
+ *
+ * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
+ *			indicates default (sysctl_tcp_cookie_size).
+ *			After cookie sent, remembers size of cookie.
+ *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
+ *
+ * @s_data_desired:	user specified tcpct_s_data_desired.  When the
+ *			constant payload is specified (@s_data_constant),
+ *			holds its length instead.
+ *			Range 0 to TCP_MSS_DESIRED.
+ *
+ * @s_data_payload:	constant data that is to be included in the
+ *			payload of SYN or SYNACK segments when the
+ *			cookie option is present.
+ */
+struct tcp_cookie_values {
+	struct kref	kref;
+	u8		cookie_pair[TCP_COOKIE_PAIR_SIZE];
+	u8		cookie_pair_size;
+	u8		cookie_desired;
+	u16		s_data_desired:11,
+			s_data_constant:1,
+			s_data_in:1,
+			s_data_out:1,
+			s_data_unused:2;
+	u8		s_data_payload[0];
+};
+
+static inline void tcp_cookie_values_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tcp_cookie_values, kref));
+}
+
+/* The length of constant payload data.  Note that s_data_desired is
+ * overloaded, depending on s_data_constant: either the length of constant
+ * data (returned here) or the limit on variable data.
+ */
+static inline int tcp_s_data_size(const struct tcp_sock *tp)
+{
+	return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
+		? tp->cookie_values->s_data_desired
+		: 0;
+}
+
+/**
+ *	struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
+ *
+ *	As tcp_request_sock has already been extended in other places, the
+ *	only remaining method is to pass stack values along as function
+ *	parameters.  These parameters are not needed after sending SYNACK.
+ *
+ * @cookie_bakery:	cryptographic secret and message workspace.
+ *
+ * @cookie_plus:	bytes in authenticator/cookie option, copied from
+ *			struct tcp_options_received (above).
+ */
+struct tcp_extend_values {
+	struct request_values		rv;
+	u32				cookie_bakery[COOKIE_WORKSPACE_WORDS];
+	u8				cookie_plus:6,
+					cookie_out_never:1,
+					cookie_in_always:1;
+};
+
+static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
+{
+	return (struct tcp_extend_values *)rvp;
+}
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 397ab8f..2bb7864 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1834,6 +1834,19 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv4_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
@@ -1887,6 +1900,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+	/* TCP Cookie Transactions */
+	if (tp->cookie_values != NULL) {
+		kref_put(&tp->cookie_values->kref,
+			 tcp_cookie_values_release);
+		tp->cookie_values = NULL;
+	}
+
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a42990..53ef6d8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -389,14 +389,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
-		struct tcp_sock *newtp;
+		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
+
+		/* TCP Cookie Transactions require space for the cookie pair,
+		 * as it differs for each connection.  There is no need to
+		 * copy any s_data_payload stored at the original socket.
+		 * Failure will prevent resuming the connection.
+		 *
+		 * Presumed copied, in order of appearance:
+		 *	cookie_in_always, cookie_out_never
+		 */
+		if (oldcvp != NULL) {
+			struct tcp_cookie_values *newcvp =
+				kzalloc(sizeof(*newtp->cookie_values),
+					GFP_ATOMIC);
+
+			if (newcvp != NULL) {
+				kref_init(&newcvp->kref);
+				newcvp->cookie_desired =
+						oldcvp->cookie_desired;
+				newtp->cookie_values = newcvp;
+			} else {
+				/* Not Yet Implemented */
+				newtp->cookie_values = NULL;
+			}
+		}
 
 		/* Now setup tcp_sock */
-		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
-		newtp->snd_up = treq->snt_isn + 1;
+
+		newtp->rcv_wup = newtp->copied_seq =
+		newtp->rcv_nxt = treq->rcv_isn + 1;
+
+		newtp->snd_sml = newtp->snd_una =
+		newtp->snd_nxt = newtp->snd_up =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		tcp_prequeue_init(newtp);
 
@@ -429,8 +458,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = treq->snt_isn + 1;
-		newtp->pushed_seq = newtp->write_seq;
+		newtp->write_seq = newtp->pushed_seq =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -596,7 +625,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+	    (TCP_SKB_CB(skb)->ack_seq !=
+	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3e327bc..973096a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1865,6 +1865,19 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS
  2009-11-13  5:10 ` [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
  2009-11-13 14:11   ` Andi Kleen
@ 2009-11-18 15:03   ` William Allen Simpson
  1 sibling, 0 replies; 42+ messages in thread
From: William Allen Simpson @ 2009-11-18 15:03 UTC (permalink / raw)
  To: Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 1127 bytes --]

William Allen Simpson wrote:
> Provide per socket control of the TCP cookie option and SYN/SYNACK data.
> 
> This is a straightforward re-implementation of an earlier (year-old)
> patch that no longer applies cleanly, with permission of the original
> author (Adam Langley):
> 
>    http://thread.gmane.org/gmane.linux.network/102586
> 
> The principle difference is using a TCP option to carry the cookie nonce,
> instead of a user configured offset in the data.
> 
> Allocations have been rearranged to avoid requiring GFP_ATOMIC.
> 
> Requires:
>   net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED
>   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
>   TCPCT part 1d: define TCP cookie option, extend existing struct's
> 
> Signed-off-by: William.Allen.Simpson@gmail.com
> ---
>  net/ipv4/tcp.c |  133 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 131 insertions(+), 2 deletions(-)
> 
Although we've had one comment, explaining that the future documentation
will be sent to another email list, there are still no technical comments.

Same patch as before.  Seeking Acks.

[-- Attachment #2: TCPCT+1e6.patch --]
[-- Type: text/plain, Size: 4406 bytes --]

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ae01bf..3424499 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2079,8 +2079,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	int val;
 	int err = 0;
 
-	/* This is a string value all the others are int's */
-	if (optname == TCP_CONGESTION) {
+	/* These are data/string values, all the others are ints */
+	switch (optname) {
+	case TCP_CONGESTION: {
 		char name[TCP_CA_NAME_MAX];
 
 		if (optlen < 1)
@@ -2097,6 +2098,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		release_sock(sk);
 		return err;
 	}
+	case TCP_COOKIE_TRANSACTIONS: {
+		struct tcp_cookie_transactions ctd;
+		struct tcp_cookie_values *cvp = NULL;
+
+		if (sizeof(ctd) > optlen)
+			return -EINVAL;
+		if (copy_from_user(&ctd, optval, sizeof(ctd)))
+			return -EFAULT;
+
+		if (ctd.tcpct_used > sizeof(ctd.tcpct_value)
+		 || ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
+			return -EINVAL;
+
+		if (ctd.tcpct_cookie_desired == 0) {
+			/* default to global value */
+		} else if ((0x1 & ctd.tcpct_cookie_desired)
+			|| ctd.tcpct_cookie_desired > TCP_COOKIE_MAX
+			|| ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
+			return -EINVAL;
+		}
+
+		if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
+			/* Supercedes all other values */
+			lock_sock(sk);
+			if (tp->cookie_values != NULL) {
+				kref_put(&tp->cookie_values->kref,
+					 tcp_cookie_values_release);
+				tp->cookie_values = NULL;
+			}
+			tp->rx_opt.cookie_in_always = 0; /* false */
+			tp->rx_opt.cookie_out_never = 1; /* true */
+			release_sock(sk);
+			return err;
+		}
+
+		/* Allocate ancillary memory before locking.
+		 */
+		if (ctd.tcpct_used > 0
+		 || (tp->cookie_values == NULL
+		  && (sysctl_tcp_cookie_size > 0
+		   || ctd.tcpct_cookie_desired > 0
+		   || ctd.tcpct_s_data_desired > 0))) {
+			cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
+				      GFP_KERNEL);
+			if (cvp == NULL)
+				return -ENOMEM;
+		}
+		lock_sock(sk);
+		tp->rx_opt.cookie_in_always =
+			(TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
+		tp->rx_opt.cookie_out_never = 0; /* false */
+
+		if (tp->cookie_values != NULL) {
+			if (cvp != NULL) {
+				/* Changed values are recorded by a changed
+				 * pointer, ensuring the cookie will differ,
+				 * without separately hashing each value later.
+				 */
+				kref_put(&tp->cookie_values->kref,
+					 tcp_cookie_values_release);
+				kref_init(&cvp->kref);
+				tp->cookie_values = cvp;
+			} else {
+				cvp = tp->cookie_values;
+			}
+		}
+		if (cvp != NULL) {
+			cvp->cookie_desired = ctd.tcpct_cookie_desired;
+
+			if (ctd.tcpct_used > 0) {
+				memcpy(cvp->s_data_payload, ctd.tcpct_value,
+				       ctd.tcpct_used);
+				cvp->s_data_desired = ctd.tcpct_used;
+				cvp->s_data_constant = 1; /* true */
+			} else {
+				/* No constant payload data. */
+				cvp->s_data_desired = ctd.tcpct_s_data_desired;
+				cvp->s_data_constant = 0; /* false */
+			}
+		}
+		release_sock(sk);
+		return err;
+	}
+	default:
+		/* fallthru */
+		break;
+	};
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -2421,6 +2509,47 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
 			return -EFAULT;
 		return 0;
+
+	case TCP_COOKIE_TRANSACTIONS: {
+		struct tcp_cookie_transactions ctd;
+		struct tcp_cookie_values *cvp = tp->cookie_values;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+		if (len < sizeof(ctd))
+			return -EINVAL;
+
+		memset(&ctd, 0, sizeof(ctd));
+		ctd.tcpct_flags = (tp->rx_opt.cookie_in_always
+				   ? TCP_COOKIE_IN_ALWAYS : 0)
+				+ (tp->rx_opt.cookie_out_never
+				   ? TCP_COOKIE_OUT_NEVER : 0);
+
+		if (cvp != NULL) {
+			ctd.tcpct_flags += (cvp->s_data_in
+					    ? TCP_S_DATA_IN : 0)
+					 + (cvp->s_data_out
+					    ? TCP_S_DATA_OUT : 0);
+
+			ctd.tcpct_cookie_desired = cvp->cookie_desired;
+			ctd.tcpct_s_data_desired = cvp->s_data_desired;
+
+			/* Cookie(s) saved, return as nonce */
+			if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) {
+				/* impossible? */
+				return -EINVAL;
+			}
+			memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
+			       cvp->cookie_pair_size);
+			ctd.tcpct_used = cvp->cookie_pair_size;
+		}
+
+		if (put_user(sizeof(ctd), optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &ctd, sizeof(ctd)))
+			return -EFAULT;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2009-11-18 15:03 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-13  4:03 [net-next-2.6 PATCH v6 0/7 RFC] TCPCT part 1: cookie option exchange William Allen Simpson
2009-11-13  4:07 ` [net-next-2.6 PATCH v6 1/7 RFC] TCPCT part 1a: add request_values parameter for sending SYNACK William Allen Simpson
2009-11-13  4:54   ` Ilpo Järvinen
2009-11-13  4:17 ` [net-next-2.6 PATCH v6 2/7 RFC] TCPCT part 1b: generate Responder Cookie William Allen Simpson
2009-11-13  6:21   ` Eric Dumazet
2009-11-13 14:35     ` William Allen Simpson
2009-11-13  6:26   ` Joe Perches
2009-11-13 14:51     ` William Allen Simpson
2009-11-13 18:04       ` Joe Perches
2009-11-16 14:39         ` William Allen Simpson
2009-11-16 15:34           ` Eric Dumazet
2009-11-16 20:06             ` William Allen Simpson
2009-11-13  4:31 ` [net-next-2.6 PATCH v6 3/7 RFC] TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
2009-11-13 18:37   ` Joe Perches
2009-11-13 19:45     ` William Allen Simpson
2009-11-14 15:43       ` William Allen Simpson
2009-11-16 20:40         ` William Allen Simpson
2009-11-13  4:53 ` [net-next-2.6 PATCH v6 4/7 RFC] TCPCT part 1d: define TCP cookie option, extend existing struct's William Allen Simpson
2009-11-13  6:32   ` Eric Dumazet
2009-11-13 16:06     ` William Allen Simpson
2009-11-16 20:50       ` William Allen Simpson
2009-11-16 21:08         ` Eric Dumazet
2009-11-16 22:09           ` William Allen Simpson
2009-11-16 22:26             ` Eric Dumazet
2009-11-17  3:15               ` David Miller
2009-11-17 10:41                 ` William Allen Simpson
2009-11-17 12:18                 ` Ilpo Järvinen
2009-11-17 12:22                   ` David Miller
2009-11-17 12:38                     ` Ilpo Järvinen
2009-11-17 12:48                       ` David Miller
2009-11-17 12:07               ` Ilpo Järvinen
2009-11-18 13:55                 ` William Allen Simpson
2009-11-18 14:08                   ` Ilpo Järvinen
2009-11-18 14:42               ` William Allen Simpson
2009-11-13  5:10 ` [net-next-2.6 PATCH v6 5/7 RFC] TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS William Allen Simpson
2009-11-13 14:11   ` Andi Kleen
2009-11-13 16:32     ` William Allen Simpson
2009-11-18 15:03   ` William Allen Simpson
2009-11-13  5:40 ` [net-next-2.6 PATCH v6 6/7 RFC] TCPCT part 1f: Initiator Cookie => Responder William Allen Simpson
2009-11-13 16:51   ` William Allen Simpson
2009-11-16 21:35     ` William Allen Simpson
2009-11-13  5:53 ` [net-next-2.6 PATCH v6 7/7 RFC] TCPCT part 1g: Responder Cookie => Initiator William Allen Simpson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.