All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
@ 2017-03-15 20:30 Soheil Hassas Yeganeh
  2017-03-15 20:30 ` [PATCH net-next 2/2] tcp: remove tcp_tw_recycle Soheil Hassas Yeganeh
  2017-03-15 22:40 ` [PATCH net-next 1/2] tcp: remove per-destination timestamp cache David Miller
  0 siblings, 2 replies; 14+ messages in thread
From: Soheil Hassas Yeganeh @ 2017-03-15 20:30 UTC (permalink / raw)
  To: davem, netdev
  Cc: Soheil Hassas Yeganeh, Eric Dumazet, Neal Cardwell,
	Yuchung Cheng, Lutz Vieweg, Florian Westphal

From: Soheil Hassas Yeganeh <soheil@google.com>

Commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection)
randomizes TCP timestamps per connection. After this commit,
there is no guarantee that the timestamps received from the
same destination are monotonically increasing. As a result,
the per-destination timestamp cache in TCP metrics (i.e., tcpm_ts
in struct tcp_metrics_block) is broken and cannot be relied upon.

Remove the per-destination timestamp cache and all related code
paths.

Note that this cache was already broken for caching timestamps of
multiple machines behind a NAT sharing the same address.

Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Cc: Lutz Vieweg <lvml@5t9.de>
Cc: Florian Westphal <fw@strlen.de>
---
 include/net/tcp.h        |   6 +-
 net/ipv4/tcp_input.c     |   6 +-
 net/ipv4/tcp_ipv4.c      |   4 --
 net/ipv4/tcp_metrics.c   | 147 ++---------------------------------------------
 net/ipv4/tcp_minisocks.c |  22 ++-----
 net/ipv6/tcp_ipv6.c      |   5 --
 6 files changed, 11 insertions(+), 179 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index bede8f7fa742..c81f3b958d44 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,11 +406,7 @@ void tcp_clear_retrans(struct tcp_sock *tp);
 void tcp_update_metrics(struct sock *sk);
 void tcp_init_metrics(struct sock *sk);
 void tcp_metrics_init(void);
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
-			bool paws_check, bool timestamps);
-bool tcp_remember_stamp(struct sock *sk);
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
 void tcp_disable_fack(struct tcp_sock *tp);
 void tcp_close(struct sock *sk, long timeout);
 void tcp_init_sock(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 96b67a8b18c3..aafec0676d3e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6342,8 +6342,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			dst = af_ops->route_req(sk, &fl, req, &strict);
 
 			if (dst && strict &&
-			    !tcp_peer_is_proven(req, dst, true,
-						tmp_opt.saw_tstamp)) {
+			    !tcp_peer_is_proven(req, dst)) {
 				NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
 				goto drop_and_release;
 			}
@@ -6352,8 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		else if (!net->ipv4.sysctl_tcp_syncookies &&
 			 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
 			  (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
-			 !tcp_peer_is_proven(req, dst, false,
-					     tmp_opt.saw_tstamp)) {
+			 !tcp_peer_is_proven(req, dst)) {
 			/* Without syncookies last quarter of
 			 * backlog is filled with destinations,
 			 * proven to be alive.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 08d870e45658..d8b401fff9fe 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			tp->write_seq	   = 0;
 	}
 
-	if (tcp_death_row->sysctl_tw_recycle &&
-	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
-		tcp_fetch_timewait_stamp(sk, &rt->dst);
-
 	inet->inet_dport = usin->sin_port;
 	sk_daddr_set(sk, daddr);
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0f46e5fe31ad..9d0d4f39e42b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
 	struct inetpeer_addr		tcpm_saddr;
 	struct inetpeer_addr		tcpm_daddr;
 	unsigned long			tcpm_stamp;
-	u32				tcpm_ts;
-	u32				tcpm_ts_stamp;
 	u32				tcpm_lock;
 	u32				tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
 	struct tcp_fastopen_metrics	tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
 	tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
 	tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
 	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
-	tm->tcpm_ts = 0;
-	tm->tcpm_ts_stamp = 0;
 	if (fastopen_clear) {
 		tm->tcpm_fastopen.mss = 0;
 		tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 	return tm;
 }
 
-static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
-{
-	struct tcp_metrics_block *tm;
-	struct inetpeer_addr saddr, daddr;
-	unsigned int hash;
-	struct net *net;
-
-	if (tw->tw_family == AF_INET) {
-		inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
-		inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
-		hash = ipv4_addr_hash(tw->tw_daddr);
-	}
-#if IS_ENABLED(CONFIG_IPV6)
-	else if (tw->tw_family == AF_INET6) {
-		if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
-			inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
-			inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
-			hash = ipv4_addr_hash(tw->tw_daddr);
-		} else {
-			inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
-			inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
-			hash = ipv6_addr_hash(&tw->tw_v6_daddr);
-		}
-	}
-#endif
-	else
-		return NULL;
-
-	net = twsk_net(tw);
-	hash ^= net_hash_mix(net);
-	hash = hash_32(hash, tcp_metrics_hash_log);
-
-	for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
-	     tm = rcu_dereference(tm->tcpm_next)) {
-		if (addr_same(&tm->tcpm_saddr, &saddr) &&
-		    addr_same(&tm->tcpm_daddr, &daddr) &&
-		    net_eq(tm_net(tm), net))
-			break;
-	}
-	return tm;
-}
-
 static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 						 struct dst_entry *dst,
 						 bool create)
@@ -573,8 +527,7 @@ void tcp_init_metrics(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
-			bool paws_check, bool timestamps)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
 {
 	struct tcp_metrics_block *tm;
 	bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
 
 	rcu_read_lock();
 	tm = __tcp_get_metrics_req(req, dst);
-	if (paws_check) {
-		if (tm &&
-		    (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
-		    ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
-		     !timestamps))
-			ret = false;
-		else
-			ret = true;
-	} else {
-		if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
-			ret = true;
-		else
-			ret = false;
-	}
-	rcu_read_unlock();
-
-	return ret;
-}
-
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
-{
-	struct tcp_metrics_block *tm;
-
-	rcu_read_lock();
-	tm = tcp_get_metrics(sk, dst, true);
-	if (tm) {
-		struct tcp_sock *tp = tcp_sk(sk);
-
-		if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
-			tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
-			tp->rx_opt.ts_recent = tm->tcpm_ts;
-		}
-	}
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
-
-/* VJ's idea. Save last timestamp seen from this destination and hold
- * it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter
- * synchronized state.
- */
-bool tcp_remember_stamp(struct sock *sk)
-{
-	struct dst_entry *dst = __sk_dst_get(sk);
-	bool ret = false;
-
-	if (dst) {
-		struct tcp_metrics_block *tm;
-
-		rcu_read_lock();
-		tm = tcp_get_metrics(sk, dst, true);
-		if (tm) {
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
-			    ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
-			     tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
-				tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
-				tm->tcpm_ts = tp->rx_opt.ts_recent;
-			}
-			ret = true;
-		}
-		rcu_read_unlock();
-	}
-	return ret;
-}
-
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
-{
-	struct tcp_metrics_block *tm;
-	bool ret = false;
-
-	rcu_read_lock();
-	tm = __tcp_get_metrics_tw(tw);
-	if (tm) {
-		const struct tcp_timewait_sock *tcptw;
-		struct sock *sk = (struct sock *) tw;
-
-		tcptw = tcp_twsk(sk);
-		if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
-		    ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
-		     tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
-			tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
-			tm->tcpm_ts	   = tcptw->tw_ts_recent;
-		}
+	if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
 		ret = true;
-	}
+	else
+		ret = false;
 	rcu_read_unlock();
 
 	return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
 			  jiffies - tm->tcpm_stamp,
 			  TCP_METRICS_ATTR_PAD) < 0)
 		goto nla_put_failure;
-	if (tm->tcpm_ts_stamp) {
-		if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
-				(s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
-			goto nla_put_failure;
-		if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
-				tm->tcpm_ts) < 0)
-			goto nla_put_failure;
-	}
 
 	{
 		int n = 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7e16243cdb58..692f974e5abe 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -94,7 +94,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_options_received tmp_opt;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	bool paws_reject = false;
-	struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -149,12 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
-		if (tcp_death_row->sysctl_tw_recycle &&
-		    tcptw->tw_ts_recent_stamp &&
-		    tcp_tw_remember_stamp(tw))
-			inet_twsk_reschedule(tw, tw->tw_timeout);
-		else
-			inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+		inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
 		return TCP_TW_ACK;
 	}
 
@@ -259,12 +253,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_timewait_sock *tw;
-	bool recycle_ok = false;
 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
-	if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-		recycle_ok = tcp_remember_stamp(sk);
-
 	tw = inet_twsk_alloc(sk, tcp_death_row, state);
 
 	if (tw) {
@@ -317,13 +307,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		if (timeo < rto)
 			timeo = rto;
 
-		if (recycle_ok) {
-			tw->tw_timeout = rto;
-		} else {
-			tw->tw_timeout = TCP_TIMEWAIT_LEN;
-			if (state == TCP_TIME_WAIT)
-				timeo = TCP_TIMEWAIT_LEN;
-		}
+		tw->tw_timeout = TCP_TIMEWAIT_LEN;
+		if (state == TCP_TIME_WAIT)
+			timeo = TCP_TIMEWAIT_LEN;
 
 		inet_twsk_schedule(tw, timeo);
 		/* Linkage updates. */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c73a431fd06f..853cb43e3e3c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk->sk_gso_type = SKB_GSO_TCPV6;
 	ip6_dst_store(sk, dst, NULL, NULL);
 
-	if (tcp_death_row->sysctl_tw_recycle &&
-	    !tp->rx_opt.ts_recent_stamp &&
-	    ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
-		tcp_fetch_timewait_stamp(sk, dst);
-
 	icsk->icsk_ext_hdr_len = 0;
 	if (opt)
 		icsk->icsk_ext_hdr_len = opt->opt_flen +
-- 
2.12.0.367.g23dc2f6d3c-goog

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 2/2] tcp: remove tcp_tw_recycle
  2017-03-15 20:30 [PATCH net-next 1/2] tcp: remove per-destination timestamp cache Soheil Hassas Yeganeh
@ 2017-03-15 20:30 ` Soheil Hassas Yeganeh
  2017-03-15 22:40 ` [PATCH net-next 1/2] tcp: remove per-destination timestamp cache David Miller
  1 sibling, 0 replies; 14+ messages in thread
From: Soheil Hassas Yeganeh @ 2017-03-15 20:30 UTC (permalink / raw)
  To: davem, netdev
  Cc: Soheil Hassas Yeganeh, Eric Dumazet, Neal Cardwell,
	Yuchung Cheng, Lutz Vieweg, Florian Westphal

From: Soheil Hassas Yeganeh <soheil@google.com>

The tcp_tw_recycle was already broken for connections
behind NAT, since the per-destination timestamp is not
monotonically increasing for multiple machines behind
a single destination address.

After the randomization of TCP timestamp offsets
in commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets
for each connection), the tcp_tw_recycle is broken for all
types of connections for the same reason: the timestamps
received from a single machine is not monotonically increasing,
anymore.

Remove tcp_tw_recycle, since it is not functional. Also, remove
the PAWSPassive SNMP counter since it is only used for
tcp_tw_recycle, and simplify tcp_v4_route_req and tcp_v6_route_req
since the strict argument is only set when tcp_tw_recycle is
enabled.

Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Cc: Lutz Vieweg <lvml@5t9.de>
Cc: Florian Westphal <fw@strlen.de>
---
 Documentation/networking/ip-sysctl.txt |  5 -----
 include/net/netns/ipv4.h               |  1 -
 include/net/tcp.h                      |  3 +--
 include/uapi/linux/snmp.h              |  1 -
 net/ipv4/proc.c                        |  1 -
 net/ipv4/sysctl_net_ipv4.c             |  7 -------
 net/ipv4/tcp_input.c                   | 30 +++++-------------------------
 net/ipv4/tcp_ipv4.c                    | 15 ++-------------
 net/ipv6/tcp_ipv6.c                    |  5 +----
 9 files changed, 9 insertions(+), 59 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab0230461377..ed3d0791eb27 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -640,11 +640,6 @@ tcp_tso_win_divisor - INTEGER
 	building larger TSO frames.
 	Default: 3
 
-tcp_tw_recycle - BOOLEAN
-	Enable fast recycling TIME-WAIT sockets. Default value is 0.
-	It should not be changed without advice/request of technical
-	experts.
-
 tcp_tw_reuse - BOOLEAN
 	Allow to reuse TIME-WAIT sockets for new connections when it is
 	safe from protocol viewpoint. Default value is 0.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 622d2da27135..2e9d649ba169 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -33,7 +33,6 @@ struct inet_timewait_death_row {
 	atomic_t		tw_count;
 
 	struct inet_hashinfo 	*hashinfo ____cacheline_aligned_in_smp;
-	int			sysctl_tw_recycle;
 	int			sysctl_max_tw_buckets;
 };
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c81f3b958d44..e614ad4d613e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1810,8 +1810,7 @@ struct tcp_request_sock_ops {
 				 __u16 *mss);
 #endif
 	struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
-				       const struct request_sock *req,
-				       bool *strict);
+				       const struct request_sock *req);
 	__u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 3b2bed7ca9a4..cec0e171d20c 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -177,7 +177,6 @@ enum
 	LINUX_MIB_TIMEWAITED,			/* TimeWaited */
 	LINUX_MIB_TIMEWAITRECYCLED,		/* TimeWaitRecycled */
 	LINUX_MIB_TIMEWAITKILLED,		/* TimeWaitKilled */
-	LINUX_MIB_PAWSPASSIVEREJECTED,		/* PAWSPassiveRejected */
 	LINUX_MIB_PAWSACTIVEREJECTED,		/* PAWSActiveRejected */
 	LINUX_MIB_PAWSESTABREJECTED,		/* PAWSEstabRejected */
 	LINUX_MIB_DELAYEDACKS,			/* DelayedACKs */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 69cf49e8356d..4ccbf464d1ac 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
 	SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
 	SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
-	SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
 	SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
 	SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
 	SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d6880a6149ee..11aaef0939b2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -981,13 +981,6 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_tw_recycle",
-		.data		= &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_max_syn_backlog",
 		.data		= &init_net.ipv4.sysctl_max_syn_backlog,
 		.maxlen		= sizeof(int),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aafec0676d3e..bb09c7095988 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6327,31 +6327,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
 
 	if (!want_cookie && !isn) {
-		/* VJ's idea. We save last timestamp seen
-		 * from the destination in peer table, when entering
-		 * state TIME-WAIT, and check against it before
-		 * accepting new connection request.
-		 *
-		 * If "isn" is not zero, this request hit alive
-		 * timewait bucket, so that all the necessary checks
-		 * are made in the function processing timewait state.
-		 */
-		if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
-			bool strict;
-
-			dst = af_ops->route_req(sk, &fl, req, &strict);
-
-			if (dst && strict &&
-			    !tcp_peer_is_proven(req, dst)) {
-				NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
-				goto drop_and_release;
-			}
-		}
 		/* Kill the following clause, if you dislike this way. */
-		else if (!net->ipv4.sysctl_tcp_syncookies &&
-			 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-			  (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
-			 !tcp_peer_is_proven(req, dst)) {
+		if (!net->ipv4.sysctl_tcp_syncookies &&
+		    (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+		     (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+		    !tcp_peer_is_proven(req, dst)) {
 			/* Without syncookies last quarter of
 			 * backlog is filled with destinations,
 			 * proven to be alive.
@@ -6367,7 +6347,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
 	}
 	if (!dst) {
-		dst = af_ops->route_req(sk, &fl, req, NULL);
+		dst = af_ops->route_req(sk, &fl, req);
 		if (!dst)
 			goto drop_and_free;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d8b401fff9fe..7482b5d11861 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1213,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req,
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
 					  struct flowi *fl,
-					  const struct request_sock *req,
-					  bool *strict)
+					  const struct request_sock *req)
 {
-	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
-
-	if (strict) {
-		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
-			*strict = true;
-		else
-			*strict = false;
-	}
-
-	return dst;
+	return inet_csk_route_req(sk, &fl->u.ip4, req);
 }
 
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
@@ -2462,7 +2452,6 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_tw_reuse = 0;
 
 	cnt = tcp_hashinfo.ehash_mask + 1;
-	net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
 	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
 	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 853cb43e3e3c..0f08d718a002 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -722,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req,
 
 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 					  struct flowi *fl,
-					  const struct request_sock *req,
-					  bool *strict)
+					  const struct request_sock *req)
 {
-	if (strict)
-		*strict = true;
 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 }
 
-- 
2.12.0.367.g23dc2f6d3c-goog

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 20:30 [PATCH net-next 1/2] tcp: remove per-destination timestamp cache Soheil Hassas Yeganeh
  2017-03-15 20:30 ` [PATCH net-next 2/2] tcp: remove tcp_tw_recycle Soheil Hassas Yeganeh
@ 2017-03-15 22:40 ` David Miller
  2017-03-15 22:55   ` Willy Tarreau
                     ` (2 more replies)
  1 sibling, 3 replies; 14+ messages in thread
From: David Miller @ 2017-03-15 22:40 UTC (permalink / raw)
  To: soheil.kdev; +Cc: netdev, soheil, edumazet, ncardwell, ycheng, lvml, fw

From: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
Date: Wed, 15 Mar 2017 16:30:45 -0400

> Note that this cache was already broken for caching timestamps of
> multiple machines behind a NAT sharing the same address.

That's the documented, well established, limitation of time-wait
recycling.

People who enable it, need to consider this issue.

This limitation of the feature does not give us a reason to break the
feature even further as a matter of convenience, or to remove it
altogether for the same reason.

Please, instead, fix the bug that was introduced.

Thank you.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 22:40 ` [PATCH net-next 1/2] tcp: remove per-destination timestamp cache David Miller
@ 2017-03-15 22:55   ` Willy Tarreau
  2017-03-16 11:31     ` Lutz Vieweg
  2017-03-15 22:57   ` Florian Westphal
  2017-03-15 22:59   ` Eric Dumazet
  2 siblings, 1 reply; 14+ messages in thread
From: Willy Tarreau @ 2017-03-15 22:55 UTC (permalink / raw)
  To: David Miller
  Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, lvml, fw

Hi David,

On Wed, Mar 15, 2017 at 03:40:44PM -0700, David Miller wrote:
> From: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
> Date: Wed, 15 Mar 2017 16:30:45 -0400
> 
> > Note that this cache was already broken for caching timestamps of
> > multiple machines behind a NAT sharing the same address.
> 
> That's the documented, well established, limitation of time-wait
> recycling.
> 
> People who enable it, need to consider this issue.
> 
> This limitation of the feature does not give us a reason to break the
> feature even further as a matter of convenience, or to remove it
> altogether for the same reason.
> 
> Please, instead, fix the bug that was introduced.

At least I can say I've seen many people enable it without understanding
its impact, confusing it with tcp_tw_reuse, and copy-pasting it from
random blogs and complaining about issues in production.

I agree that it's hard to arbiter between stupidity and flexibility
though :-/

Regards,
Willy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 22:40 ` [PATCH net-next 1/2] tcp: remove per-destination timestamp cache David Miller
  2017-03-15 22:55   ` Willy Tarreau
@ 2017-03-15 22:57   ` Florian Westphal
  2017-03-15 23:45     ` David Miller
  2017-03-15 22:59   ` Eric Dumazet
  2 siblings, 1 reply; 14+ messages in thread
From: Florian Westphal @ 2017-03-15 22:57 UTC (permalink / raw)
  To: David Miller
  Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, lvml, fw

David Miller <davem@davemloft.net> wrote:
> From: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
> Date: Wed, 15 Mar 2017 16:30:45 -0400
> 
> > Note that this cache was already broken for caching timestamps of
> > multiple machines behind a NAT sharing the same address.
> 
> That's the documented, well established, limitation of time-wait
> recycling.

Sigh.

"don't enable this if you connect your machine to the internet".
We're not in the 1990s anymore.  Even I am behind ipv4 CG-NAT nowadays.

So I disagree and would remove this thing.

> This limitation of the feature does not give us a reason to break the
> feature even further as a matter of convenience, or to remove it
> altogether for the same reason.
> 
> Please, instead, fix the bug that was introduced.

AFAIU we only have two alternatives, removal of the randomization feature
or switch to a offset computed via hash(saddr, daddr, secret).

Unless there are more comments I'll look into doing the latter tomorrow.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 22:40 ` [PATCH net-next 1/2] tcp: remove per-destination timestamp cache David Miller
  2017-03-15 22:55   ` Willy Tarreau
  2017-03-15 22:57   ` Florian Westphal
@ 2017-03-15 22:59   ` Eric Dumazet
  2017-03-15 23:45     ` David Miller
  2 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2017-03-15 22:59 UTC (permalink / raw)
  To: David Miller
  Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, lvml, fw

On Wed, 2017-03-15 at 15:40 -0700, David Miller wrote:
> From: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
> Date: Wed, 15 Mar 2017 16:30:45 -0400
> 
> > Note that this cache was already broken for caching timestamps of
> > multiple machines behind a NAT sharing the same address.
> 
> That's the documented, well established, limitation of time-wait
> recycling.
> 
> People who enable it, need to consider this issue.
> 
> This limitation of the feature does not give us a reason to break the
> feature even further as a matter of convenience, or to remove it
> altogether for the same reason.
> 
> Please, instead, fix the bug that was introduced.
> 
> Thank you.

You mean revert Florian nice patches ?

This would kill timestamps randomization, and thus prevent some
organizations to turn TCP timestamps on.

TCP timestamps are more useful than this obscure tw_recycle thing that
is hurting innocent users.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 22:59   ` Eric Dumazet
@ 2017-03-15 23:45     ` David Miller
  2017-03-16  0:06       ` Eric Dumazet
  0 siblings, 1 reply; 14+ messages in thread
From: David Miller @ 2017-03-15 23:45 UTC (permalink / raw)
  To: eric.dumazet
  Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, lvml, fw

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Mar 2017 15:59:01 -0700

> On Wed, 2017-03-15 at 15:40 -0700, David Miller wrote:
>> From: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
>> Date: Wed, 15 Mar 2017 16:30:45 -0400
>> 
>> > Note that this cache was already broken for caching timestamps of
>> > multiple machines behind a NAT sharing the same address.
>> 
>> That's the documented, well established, limitation of time-wait
>> recycling.
>> 
>> People who enable it, need to consider this issue.
>> 
>> This limitation of the feature does not give us a reason to break the
>> feature even further as a matter of convenience, or to remove it
>> altogether for the same reason.
>> 
>> Please, instead, fix the bug that was introduced.
>> 
>> Thank you.
> 
> You mean revert Florian nice patches ?
> 
> This would kill timestamps randomization, and thus prevent some
> organizations to turn TCP timestamps on.
> 
> TCP timestamps are more useful than this obscure tw_recycle thing that
> is hurting innocent users.

Ok, I guess we can remove it in that case.  I'm still a bit disappointed
as I was always hoping someone would find a way to make this work even
in the presence of NAT.

I must be too optimistic.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 22:57   ` Florian Westphal
@ 2017-03-15 23:45     ` David Miller
  0 siblings, 0 replies; 14+ messages in thread
From: David Miller @ 2017-03-15 23:45 UTC (permalink / raw)
  To: fw; +Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, lvml

From: Florian Westphal <fw@strlen.de>
Date: Wed, 15 Mar 2017 23:57:26 +0100

> AFAIU we only have two alternatives, removal of the randomization feature
> or switch to a offset computed via hash(saddr, daddr, secret).
> 
> Unless there are more comments I'll look into doing the latter tomorrow.

No, I'll apply the removal.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 23:45     ` David Miller
@ 2017-03-16  0:06       ` Eric Dumazet
  2017-03-19  7:53         ` Alexander Alemayhu
  0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2017-03-16  0:06 UTC (permalink / raw)
  To: David Miller
  Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, lvml, fw

On Wed, 2017-03-15 at 16:45 -0700, David Miller wrote:

> Ok, I guess we can remove it in that case.  I'm still a bit disappointed
> as I was always hoping someone would find a way to make this work even
> in the presence of NAT.

Nat are good, Nat are good.

I can't find this hilarious video we watched in Copenhagen ;)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-15 22:55   ` Willy Tarreau
@ 2017-03-16 11:31     ` Lutz Vieweg
  2017-03-16 15:40       ` Neal Cardwell
  0 siblings, 1 reply; 14+ messages in thread
From: Lutz Vieweg @ 2017-03-16 11:31 UTC (permalink / raw)
  To: Willy Tarreau, David Miller
  Cc: soheil.kdev, netdev, soheil, edumazet, ncardwell, ycheng, fw

On 03/15/2017 11:55 PM, Willy Tarreau wrote:
> At least I can say I've seen many people enable it without understanding its impact, confusing it
> with tcp_tw_reuse, and copy-pasting it from random blogs and complaining about issues in
> production.

I currently wonder: What it the correct advise to an operator who needs
to run one server instance that is meant to accept thousands of new,
short-lived TCP connections per minute?

The explanation text at
https://vincent.bernat.im/en/blog/2014-tcp-time-wait-state-linux
seems to provide comprehensive advise, but its summary is, after all,
somewhat disappointing:

> The universal solution is to increase the number of possible quadruplets by using, for example,
> more server ports. This will allow you to not exhaust the possible connections with TIME-WAIT
> entries.

Assuming an operator has to deal with a given server executable, which does not
provide a feature to "open many listening ports for the same purpose in parallel",
this is hardly an option. (Of course, if you can start just N instead of 1 server
instance, this becomes an option, but not everything is a simple, stateless web server.)

> On the server side, do not enable net.ipv4.tcp_tw_recycle unless you are pretty sure you will
> never have NAT devices in the mix. Enabling net.ipv4.tcp_tw_reuse is useless for incoming
> connections.

So basically both options won't help the server operator.

> On the client side, enabling net.ipv4.tcp_tw_reuse is another almost-safe solution. Enabling
> net.ipv4.tcp_tw_recycle in addition to net.ipv4.tcp_tw_reuse is mostly useless.

If you just operate the server, but not the (remote) clients, this is not relevant.


Is the final verdict that unless a server software by itself offers to open
N listen ports for the same purpose, there is no solution?

Regards,

Lutz Vieweg

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-16 11:31     ` Lutz Vieweg
@ 2017-03-16 15:40       ` Neal Cardwell
  2017-03-16 16:05         ` Willy Tarreau
  2017-03-16 17:30         ` Lutz Vieweg
  0 siblings, 2 replies; 14+ messages in thread
From: Neal Cardwell @ 2017-03-16 15:40 UTC (permalink / raw)
  To: Lutz Vieweg
  Cc: Willy Tarreau, David Miller, Soheil Hassas Yeganeh, Netdev,
	Soheil Hassas Yeganeh, Eric Dumazet, Yuchung Cheng,
	Florian Westphal

On Thu, Mar 16, 2017 at 7:31 AM, Lutz Vieweg <lvml@5t9.de> wrote:
>
> On 03/15/2017 11:55 PM, Willy Tarreau wrote:
>>
>> At least I can say I've seen many people enable it without understanding its impact, confusing it
>> with tcp_tw_reuse, and copy-pasting it from random blogs and complaining about issues in
>> production.
>
>
> I currently wonder: What it the correct advise to an operator who needs
> to run one server instance that is meant to accept thousands of new,
> short-lived TCP connections per minute?

Note that for this to be a problem there would have to be thousands of
new, short-lived TCP connections per minute from a single source IP
address to a single destination IP address. Normal client software
should not be doing this. AFAIK this is pretty rare, unless someone is
running a load test or has an overly-aggressive monitoring system. NAT
boxes or proxies with that kind of traffic should be running with
multiple public source IPs.

But if/when the problem occurs, then the feasible solutions I'm aware
of, in approximate descending order of preference, are:

(1) use longer connections from the client side (browsers and RPC libraries are
    usually pretty good about keeping connections open for a long time, so this
    is usually sufficient)

(2) have the client do the close(), so the client is the side to carry the
    TIME_WAIT state

(3) have the server use SO_LINGER with a timeout of 0, so that
    the connection is closed with a RST and the server carries no
    TIME_WAIT state

neal

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-16 15:40       ` Neal Cardwell
@ 2017-03-16 16:05         ` Willy Tarreau
  2017-03-16 17:30         ` Lutz Vieweg
  1 sibling, 0 replies; 14+ messages in thread
From: Willy Tarreau @ 2017-03-16 16:05 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: Lutz Vieweg, David Miller, Soheil Hassas Yeganeh, Netdev,
	Soheil Hassas Yeganeh, Eric Dumazet, Yuchung Cheng,
	Florian Westphal

Hi Neal,

On Thu, Mar 16, 2017 at 11:40:52AM -0400, Neal Cardwell wrote:
> On Thu, Mar 16, 2017 at 7:31 AM, Lutz Vieweg <lvml@5t9.de> wrote:
> >
> > On 03/15/2017 11:55 PM, Willy Tarreau wrote:
> >>
> >> At least I can say I've seen many people enable it without understanding its impact, confusing it
> >> with tcp_tw_reuse, and copy-pasting it from random blogs and complaining about issues in
> >> production.
> >
> >
> > I currently wonder: What it the correct advise to an operator who needs
> > to run one server instance that is meant to accept thousands of new,
> > short-lived TCP connections per minute?
> 
> Note that for this to be a problem there would have to be thousands of
> new, short-lived TCP connections per minute from a single source IP
> address to a single destination IP address. Normal client software
> should not be doing this. AFAIK this is pretty rare, unless someone is
> running a load test or has an overly-aggressive monitoring system. NAT
> boxes or proxies with that kind of traffic should be running with
> multiple public source IPs.

In fact it's the regular stuff with reverse-proxies. You can scan the
whole source port range every second. But when enabling timestamps, you
benefit from PAWS and you don't have any problem anymore, everything
works pretty well.

> But if/when the problem occurs, then the feasible solutions I'm aware
> of, in approximate descending order of preference, are:
> 
> (1) use longer connections from the client side (browsers and RPC libraries are
>     usually pretty good about keeping connections open for a long time, so this
>     is usually sufficient)
> 
> (2) have the client do the close(), so the client is the side to carry the
>     TIME_WAIT state

That's impossible for proxies, as you can't connect again from the same
source port, causing the performances to be divided by more than 100. What
proxies have to do when they're forced to close first an outgoing connection
is to set SO_LINGER to (0,0) so that an RST is used and the source port can
be reused. But as you guess, if that RST gets lost, then next opening is
not that beautiful : either [SYN, ACK, RST, pause, SYN, SYN-ACK, ACK] or
[SYN, RST, pause SYN, SYN-ACK, ACK] depending on whether the SYN appears
in the previous window or not.

> (3) have the server use SO_LINGER with a timeout of 0, so that
>     the connection is closed with a RST and the server carries no
>     TIME_WAIT state

The problem is that it also kills the tail data.

Quite frankly, the only issues I'm used to see are with clients closing
first and with reusing source connections. As soon as timestamps are
enabled on both sides and people don't blindly play with tcp_tw_recycle,
I really never face any connection issue.

Willy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-16 15:40       ` Neal Cardwell
  2017-03-16 16:05         ` Willy Tarreau
@ 2017-03-16 17:30         ` Lutz Vieweg
  1 sibling, 0 replies; 14+ messages in thread
From: Lutz Vieweg @ 2017-03-16 17:30 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: Willy Tarreau, David Miller, Soheil Hassas Yeganeh, Netdev,
	Soheil Hassas Yeganeh, Eric Dumazet, Yuchung Cheng,
	Florian Westphal

On 03/16/2017 04:40 PM, Neal Cardwell wrote:
>> I currently wonder: What it the correct advise to an operator who needs
>> to run one server instance that is meant to accept thousands of new,
>> short-lived TCP connections per minute?
>
> Note that for this to be a problem there would have to be thousands of
> new, short-lived TCP connections per minute from a single source IP
> address to a single destination IP address. Normal client software
> should not be doing this. AFAIK this is pretty rare, unless someone is
> running a load test or has an overly-aggressive monitoring system.

Indeed, I meanwhile found that a load/regression test scenario had
been the rationale for the tcp_tw_recycle = 1 setting - when a
recorded log of hundreds of thousands connections (each placing
one or a few requests) was replayed, this failed due to excessive
number of TIME_WAIT state connections.

Do I understand correctly that "tcp_tw_recycle = 1" is fine
in such a scenario as one can be sure both client and server
are at fixed, not-NATed IP addresses?

I wonder whether there might be a possibility to limit the use
of "tcp_tw_recycle = 1" to either a certain address or listen-port range?

If not, I guess our best option at this time is to advise
enabling "tcp_tw_recycle = 1" only while explicitely performing
local load/regression tests, and to disable it otherwise.
(This however means that running both automated continous integration
tests and any services for remote clients on the same system
would not mix well, as the setting could be "right" for only one
of them.)


> (1) use longer connections from the client side

Sure, in cases where that is under our control, we do exactly that.

> (2) have the client do the close(), so the client is the side to carry the
>      TIME_WAIT state

In the load/regression test scenario, we are both server and client,
so I guess this would not help.

> (3) have the server use SO_LINGER with a timeout of 0, so that
>      the connection is closed with a RST and the server carries no
>      TIME_WAIT state

Potentially losing the end of some conversation is not really
an option for most protocols / use cases.

Regards,

Lutz Vieweg

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 1/2] tcp: remove per-destination timestamp cache
  2017-03-16  0:06       ` Eric Dumazet
@ 2017-03-19  7:53         ` Alexander Alemayhu
  0 siblings, 0 replies; 14+ messages in thread
From: Alexander Alemayhu @ 2017-03-19  7:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, soheil.kdev, netdev, soheil, edumazet, ncardwell,
	ycheng, lvml, fw

On Wed, Mar 15, 2017 at 05:06:58PM -0700, Eric Dumazet wrote:
> 
> Nat are good, Nat are good.
> 
> I can't find this hilarious video we watched in Copenhagen ;)
> 
Maybe 'Oops, I did it: IPv6 NAT by Patrick McHardy'[0]. Starts around
19:10.

[0]: http://video.dkuug.dk/media/oops-i-did-it-ipv6-nat-by-patrick-mchardy

-- 
Mit freundlichen Grüßen

Alexander Alemayhu

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-03-19  7:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-15 20:30 [PATCH net-next 1/2] tcp: remove per-destination timestamp cache Soheil Hassas Yeganeh
2017-03-15 20:30 ` [PATCH net-next 2/2] tcp: remove tcp_tw_recycle Soheil Hassas Yeganeh
2017-03-15 22:40 ` [PATCH net-next 1/2] tcp: remove per-destination timestamp cache David Miller
2017-03-15 22:55   ` Willy Tarreau
2017-03-16 11:31     ` Lutz Vieweg
2017-03-16 15:40       ` Neal Cardwell
2017-03-16 16:05         ` Willy Tarreau
2017-03-16 17:30         ` Lutz Vieweg
2017-03-15 22:57   ` Florian Westphal
2017-03-15 23:45     ` David Miller
2017-03-15 22:59   ` Eric Dumazet
2017-03-15 23:45     ` David Miller
2017-03-16  0:06       ` Eric Dumazet
2017-03-19  7:53         ` Alexander Alemayhu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.