All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/5] soreuseport: TCP/IPv4 implementation
@ 2013-01-22 19:50 Tom Herbert
  2013-01-25  8:08 ` Steffen Klassert
  0 siblings, 1 reply; 9+ messages in thread
From: Tom Herbert @ 2013-01-22 19:50 UTC (permalink / raw)
  To: netdev, davem; +Cc: netdev, eric.dumazet

Allow multiple listener sockets to bind to the same port.

Motivation for soresuseport would be something like a web server
binding to port 80 running with multiple threads, where each thread
might have it's own listener socket.  This could be done as an
alternative to other models: 1) have one listener thread which
dispatches completed connections to workers. 2) accept on a single
listener socket from multiple threads.  In case #1 the listener thread
can easily become the bottleneck with high connection turn-over rate.
In case #2, the proportion of connections accepted per thread tends
to be uneven under high connection load (assuming simple event loop:
while (1) { accept(); process() }, wakeup does not promote fairness
among the sockets.  We have seen the  disproportion to be as high
as 3:1 ratio between thread accepting most connections and the one
accepting the fewest.  With so_reusport the distribution is
uniform.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/net/inet_hashtables.h          |   13 ++++++--
 include/net/netfilter/nf_tproxy_core.h |    1 +
 net/ipv4/inet_connection_sock.c        |   48 ++++++++++++++++++++++++-------
 net/ipv4/inet_hashtables.c             |   28 ++++++++++++++----
 net/ipv4/tcp_ipv4.c                    |    4 ++-
 5 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 67a8fa0..7b2ae9d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -81,7 +81,9 @@ struct inet_bind_bucket {
 	struct net		*ib_net;
 #endif
 	unsigned short		port;
-	signed short		fastreuse;
+	signed char		fastreuse;
+	signed char		fastreuseport;
+	kuid_t			fastuid;
 	int			num_owners;
 	struct hlist_node	node;
 	struct hlist_head	owners;
@@ -257,15 +259,19 @@ extern void inet_unhash(struct sock *sk);
 
 extern struct sock *__inet_lookup_listener(struct net *net,
 					   struct inet_hashinfo *hashinfo,
+					   const __be32 saddr,
+					   const __be16 sport,
 					   const __be32 daddr,
 					   const unsigned short hnum,
 					   const int dif);
 
 static inline struct sock *inet_lookup_listener(struct net *net,
 		struct inet_hashinfo *hashinfo,
+		__be32 saddr, __be16 sport,
 		__be32 daddr, __be16 dport, int dif)
 {
-	return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif);
+	return __inet_lookup_listener(net, hashinfo, saddr, sport,
+				      daddr, ntohs(dport), dif);
 }
 
 /* Socket demux engine toys. */
@@ -358,7 +364,8 @@ static inline struct sock *__inet_lookup(struct net *net,
 	struct sock *sk = __inet_lookup_established(net, hashinfo,
 				saddr, sport, daddr, hnum, dif);
 
-	return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif);
+	return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport,
+					     daddr, hnum, dif);
 }
 
 static inline struct sock *inet_lookup(struct net *net,
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index 75ca929..1937964 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -82,6 +82,7 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 			break;
 		case NFT_LOOKUP_LISTENER:
 			sk = inet_lookup_listener(net, &tcp_hashinfo,
+						    saddr, sport,
 						    daddr, dport,
 						    in->ifindex);
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d0670f0..8bb623d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -59,6 +59,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	struct sock *sk2;
 	struct hlist_node *node;
 	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
 
 	/*
 	 * Unlike other sk lookup places we do not check
@@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
+			if ((!reuse || !sk2->sk_reuse ||
+			    sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			    (sk2->sk_state != TCP_TIME_WAIT &&
+			     !uid_eq(uid, sock_i_uid(sk2))))) {
 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -106,6 +111,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 	int ret, attempts = 5;
 	struct net *net = sock_net(sk);
 	int smallest_size = -1, smallest_rover;
+	kuid_t uid = sock_i_uid(sk);
 
 	local_bh_disable();
 	if (!snum) {
@@ -125,9 +131,12 @@ again:
 			spin_lock(&head->lock);
 			inet_bind_bucket_for_each(tb, node, &head->chain)
 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
-					if (tb->fastreuse > 0 &&
-					    sk->sk_reuse &&
-					    sk->sk_state != TCP_LISTEN &&
+					if (((tb->fastreuse > 0 &&
+					      sk->sk_reuse &&
+					      sk->sk_state != TCP_LISTEN) ||
+					     (tb->fastreuseport > 0 &&
+					      sk->sk_reuseport &&
+					      uid_eq(tb->fastuid, uid))) &&
 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
@@ -185,14 +194,17 @@ tb_found:
 		if (sk->sk_reuse == SK_FORCE_REUSE)
 			goto success;
 
-		if (tb->fastreuse > 0 &&
-		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+		if (((tb->fastreuse > 0 &&
+		      sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+		     (tb->fastreuseport > 0 &&
+		      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
 		    smallest_size == -1) {
 			goto success;
 		} else {
 			ret = 1;
 			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
-				if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+				if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+				     (sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
 				    smallest_size != -1 && --attempts >= 0) {
 					spin_unlock(&head->lock);
 					goto again;
@@ -212,9 +224,23 @@ tb_not_found:
 			tb->fastreuse = 1;
 		else
 			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
+		if (sk->sk_reuseport) {
+			tb->fastreuseport = 1;
+			tb->fastuid = uid;
+		} else {
+			tb->fastreuseport = 0;
+			tb->fastuid = 0;
+		}
+	} else {
+		if (tb->fastreuse &&
+		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+			tb->fastreuse = 0;
+		if (tb->fastreuseport &&
+		    (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) {
+			tb->fastreuseport = 0;
+			tb->fastuid = 0;
+		}
+	}
 success:
 	if (!inet_csk(sk)->icsk_bind_hash)
 		inet_bind_hash(sk, tb, snum);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fa3ae81..0ce0595 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 		write_pnet(&tb->ib_net, hold_net(net));
 		tb->port      = snum;
 		tb->fastreuse = 0;
+		tb->fastreuseport = 0;
 		tb->num_owners = 0;
 		INIT_HLIST_HEAD(&tb->owners);
 		hlist_add_head(&tb->node, &head->chain);
@@ -151,16 +152,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
 			!ipv6_only_sock(sk)) {
 		__be32 rcv_saddr = inet->inet_rcv_saddr;
-		score = sk->sk_family == PF_INET ? 1 : 0;
+		score = sk->sk_family == PF_INET ? 2 : 1;
 		if (rcv_saddr) {
 			if (rcv_saddr != daddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -176,6 +177,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 struct sock *__inet_lookup_listener(struct net *net,
 				    struct inet_hashinfo *hashinfo,
+				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
 				    const int dif)
 {
@@ -183,17 +185,29 @@ struct sock *__inet_lookup_listener(struct net *net,
 	struct hlist_nulls_node *node;
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	int score, hiscore;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
 
 	rcu_read_lock();
 begin:
 	result = NULL;
-	hiscore = -1;
+	hiscore = 0;
 	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif);
 		if (score > hiscore) {
 			result = sk;
 			hiscore = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			}
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
+				result = sk;
+			phash = next_pseudo_random32(phash);
 		}
 	}
 	/*
@@ -501,7 +515,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
 				if (net_eq(ib_net(tb), net) &&
 				    tb->port == port) {
-					if (tb->fastreuse >= 0)
+					if (tb->fastreuse >= 0 ||
+					    tb->fastreuseport >= 0)
 						goto next_port;
 					WARN_ON(hlist_empty(&tb->owners));
 					if (!check_established(death_row, sk,
@@ -518,6 +533,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 				break;
 			}
 			tb->fastreuse = -1;
+			tb->fastreuseport = -1;
 			goto ok;
 
 		next_port:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6ce9ca..bbbdcc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
-					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     &tcp_hashinfo, ip_hdr(skb)->saddr,
+					     th->source, ip_hdr(skb)->daddr,
 					     ntohs(th->source), inet_iif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
@@ -2074,6 +2075,7 @@ do_time_wait:
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
 							&tcp_hashinfo,
+							iph->saddr, th->source,
 							iph->daddr, th->dest,
 							inet_iif(skb));
 		if (sk2) {
-- 
1.7.7.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-22 19:50 [PATCH 2/5] soreuseport: TCP/IPv4 implementation Tom Herbert
@ 2013-01-25  8:08 ` Steffen Klassert
  2013-01-26  3:40   ` Tom Herbert
  0 siblings, 1 reply; 9+ messages in thread
From: Steffen Klassert @ 2013-01-25  8:08 UTC (permalink / raw)
  To: Tom Herbert; +Cc: netdev, davem, netdev, eric.dumazet

On Tue, Jan 22, 2013 at 11:50:24AM -0800, Tom Herbert wrote:
> -	} else if (tb->fastreuse &&
> -		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
> -		tb->fastreuse = 0;
> +		if (sk->sk_reuseport) {
> +			tb->fastreuseport = 1;
> +			tb->fastuid = uid;
> +		} else {
> +			tb->fastreuseport = 0;
> +			tb->fastuid = 0;
> +		}
> +	} else {
> +		if (tb->fastreuse &&
> +		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
> +			tb->fastreuse = 0;
> +		if (tb->fastreuseport &&
> +		    (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) {
> +			tb->fastreuseport = 0;
> +			tb->fastuid = 0;
> +		}

I'm getting the following compile error due to the 0 assignment
to tb->fastuid:

net/ipv4/inet_connection_sock.c: In function ‘inet_csk_get_port’:
net/ipv4/inet_connection_sock.c:232:16: error: incompatible types when assigning to type ‘kuid_t’ from type ‘int’
net/ipv4/inet_connection_sock.c:241:16: error: incompatible types when assigning to type ‘kuid_t’ from type ‘int’
  CC      lib/show_mem.o
  CC      net/ipv6/ipv6_sockglue.o
make[3]: *** [net/ipv4/inet_connection_sock.o] Error 1
make[2]: *** [net/ipv4] Error 2

I have not seen this reported so far what surprises me a bit.
This is net-next from today complied with
gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-25  8:08 ` Steffen Klassert
@ 2013-01-26  3:40   ` Tom Herbert
  0 siblings, 0 replies; 9+ messages in thread
From: Tom Herbert @ 2013-01-26  3:40 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: netdev, davem, netdev, eric.dumazet

Thanks Steffen.  I'm looking at it.

Tom

On Fri, Jan 25, 2013 at 12:08 AM, Steffen Klassert
<steffen.klassert@secunet.com> wrote:
> On Tue, Jan 22, 2013 at 11:50:24AM -0800, Tom Herbert wrote:
>> -     } else if (tb->fastreuse &&
>> -                (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
>> -             tb->fastreuse = 0;
>> +             if (sk->sk_reuseport) {
>> +                     tb->fastreuseport = 1;
>> +                     tb->fastuid = uid;
>> +             } else {
>> +                     tb->fastreuseport = 0;
>> +                     tb->fastuid = 0;
>> +             }
>> +     } else {
>> +             if (tb->fastreuse &&
>> +                 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
>> +                     tb->fastreuse = 0;
>> +             if (tb->fastreuseport &&
>> +                 (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) {
>> +                     tb->fastreuseport = 0;
>> +                     tb->fastuid = 0;
>> +             }
>
> I'm getting the following compile error due to the 0 assignment
> to tb->fastuid:
>
> net/ipv4/inet_connection_sock.c: In function ‘inet_csk_get_port’:
> net/ipv4/inet_connection_sock.c:232:16: error: incompatible types when assigning to type ‘kuid_t’ from type ‘int’
> net/ipv4/inet_connection_sock.c:241:16: error: incompatible types when assigning to type ‘kuid_t’ from type ‘int’
>   CC      lib/show_mem.o
>   CC      net/ipv6/ipv6_sockglue.o
> make[3]: *** [net/ipv4/inet_connection_sock.o] Error 1
> make[2]: *** [net/ipv4] Error 2
>
> I have not seen this reported so far what surprises me a bit.
> This is net-next from today complied with
> gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-16 21:30   ` Tom Herbert
@ 2013-01-16 22:14     ` Vijay Subramanian
  0 siblings, 0 replies; 9+ messages in thread
From: Vijay Subramanian @ 2013-01-16 22:14 UTC (permalink / raw)
  To: Tom Herbert; +Cc: netdev, davem, netdev, eric.dumazet

On 16 January 2013 13:30, Tom Herbert <therbert@google.com> wrote:
> I'll send updated patches today fixing the issue raised so far.  You
> might want to put these on top of those?
>

Apologies for letting the mailer mangle the patch. I think I have fixed it now.
Instead of resending this version, I will wait till your patches are
applied and send a version on top of those.

Thanks,
Vijay

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-16 21:09 ` Vijay Subramanian
  2013-01-16 21:10   ` David Miller
@ 2013-01-16 21:30   ` Tom Herbert
  2013-01-16 22:14     ` Vijay Subramanian
  1 sibling, 1 reply; 9+ messages in thread
From: Tom Herbert @ 2013-01-16 21:30 UTC (permalink / raw)
  To: Vijay Subramanian; +Cc: netdev, davem, netdev, eric.dumazet

I'll send updated patches today fixing the issue raised so far.  You
might want to put these on top of those?

On Wed, Jan 16, 2013 at 1:09 PM, Vijay Subramanian
<subramanian.vijay@gmail.com> wrote:
>
>
>>          * Unlike other sk lookup places we do not check
>> @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
>>                     (!sk->sk_bound_dev_if ||
>>                      !sk2->sk_bound_dev_if ||
>>                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
>> -                       if (!reuse || !sk2->sk_reuse ||
>> -                           sk2->sk_state == TCP_LISTEN) {
>> +                       if ((!reuse || !sk2->sk_reuse ||
>> +                           sk2->sk_state == TCP_LISTEN) &&
>> +                           (!reuseport || !sk2->sk_reuseport ||
>> +                           (sk2->sk_state != TCP_TIME_WAIT &&
>> +                            uid != sock_i_uid(sk2)))) {
>>                                 const __be32 sk2_rcv_saddr =
>> sk_rcv_saddr(sk2);
>>                                 if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
>>                                     sk2_rcv_saddr == sk_rcv_saddr(sk))
>
>
>
> How about introducing some helper functions to make inet_csk_bid_conflict()
> and inet6_csk_bind_conflict() more readable as in patch below? We can add
> another test for reuseport() for soreuseport patches.
>
> udp.c already has ipv4_rcv_saddr_equal() but it seems to call
> ipv6_only_sock() and not inet_v6_ipv6only() which is needed in
> inet{6}_csk_bid_conflict().So I added sk_rcv_saddr_equal().
>
> Also the bind_conflict functions can return bool instead of int (not
> implemented in patch below).
>
>
> If patch idea below is ok, I will send it officially.
>
> Thanks,
> Vijay
>
>
> diff --git a/include/net/inet_connection_sock.h
> b/include/net/inet_connection_sock.h
> index 1832927..c15d2eb 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -22,6 +22,8 @@
>
>  #include <net/inet_sock.h>
>  #include <net/request_sock.h>
> +#include <net/tcp_states.h>
> +#include <net/inet_timewait_sock.h>
>
>  #define INET_CSK_DEBUG 1
>
> @@ -205,6 +207,37 @@ static inline void inet_csk_clear_xmit_timer(struct
> sock *sk, const int what)
>  #endif
>  }
>
> +/* The port cannot be reused if the older socket is in LISTEN state or if
> + * either the old or new one does not allow reuse
> + */
> +static inline bool sk_reuse_equal(int reuse, const struct sock *sk2)
> +{
> +       return !reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN;
> +}
> +
> +/* The port cannot be reused if both sockets are bound to the same device
> or
> + * if either one is not bound
> + */
> +static inline bool sk_bound_dev_equal(const struct sock *sk,
> +                                     const struct sock *sk2)
> +{
> +       return !sk->sk_bound_dev_if || !sk2->sk_bound_dev_if ||
> +              sk->sk_bound_dev_if == sk2->sk_bound_dev_if;
> +}
> +
> +/* The port cannot be reused if both sockets have the same rcv_saddr
> + * or if either rcv_saddr is NULL
> + */
> +static inline bool sk_rcv_saddr_equal(const struct sock *sk1,
> +                                     const struct sock *sk2)
> +{
> +       __be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
> +       __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
> +
> +       return !sk2_rcv_saddr || !sk1_rcv_saddr ||
> +              sk2_rcv_saddr == sk1_rcv_saddr;
> +}
> +
>  /*
>   *     Reset the retransmission timer
>   */
> diff --git a/net/ipv4/inet_connection_sock.c
> b/net/ipv4/inet_connection_sock.c
> index d0670f0..375cca3 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -68,26 +68,15 @@ int inet_csk_bind_conflict(const struct sock *sk,
>          */
>
>         sk_for_each_bound(sk2, node, &tb->owners) {
> -               if (sk != sk2 &&
> -                   !inet_v6_ipv6only(sk2) &&
> -                   (!sk->sk_bound_dev_if ||
> -                    !sk2->sk_bound_dev_if ||
>
> -                    sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
> -                       if (!reuse || !sk2->sk_reuse ||
> -                           sk2->sk_state == TCP_LISTEN) {
> -                               const __be32 sk2_rcv_saddr =
> sk_rcv_saddr(sk2);
> -                               if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
> -                                   sk2_rcv_saddr == sk_rcv_saddr(sk))
> -                                       break;
> -                       }
> -                       if (!relax && reuse && sk2->sk_reuse &&
> -                           sk2->sk_state != TCP_LISTEN) {
> -                               const __be32 sk2_rcv_saddr =
> sk_rcv_saddr(sk2);
> +               if (sk != sk2 && !inet_v6_ipv6only(sk2) &&
> +                   sk_bound_dev_equal(sk, sk2)) {
> +                       if (sk_reuse_equal(reuse, sk2) &&
> +                           sk_rcv_saddr_equal(sk, sk2))
> +                               break;
>
> -                               if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
> -                                   sk2_rcv_saddr == sk_rcv_saddr(sk))
> -                                       break;
> -                       }
> +                       if (!relax && sk_reuse_equal(reuse, sk2) &&
> +                           sk_rcv_saddr_equal(sk, sk2))
> +                               break;
>                 }
>         }
>         return node != NULL;
> diff --git a/net/ipv6/inet6_connection_sock.c
> b/net/ipv6/inet6_connection_sock.c
> index 3064785..8ebe20d 100644
> --- a/net/ipv6/inet6_connection_sock.c
> +++ b/net/ipv6/inet6_connection_sock.c
> @@ -39,13 +39,9 @@ int inet6_csk_bind_conflict(const struct sock *sk,
>          * vs net namespaces issues.
>          */
>         sk_for_each_bound(sk2, node, &tb->owners) {
> -               if (sk != sk2 &&
> -                   (!sk->sk_bound_dev_if ||
> -                    !sk2->sk_bound_dev_if ||
> -                    sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
> -                   (!sk->sk_reuse || !sk2->sk_reuse ||
> -                    sk2->sk_state == TCP_LISTEN) &&
> -                    ipv6_rcv_saddr_equal(sk, sk2))
> +               if (sk != sk2 && sk_bound_dev_equal(sk, sk2) &&
> +                   sk_reuse_equal(sk->sk_reuse, sk2) &&
> +                   ipv6_rcv_saddr_equal(sk, sk2))
>                         break;
>         }
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-16 21:09 ` Vijay Subramanian
@ 2013-01-16 21:10   ` David Miller
  2013-01-16 21:30   ` Tom Herbert
  1 sibling, 0 replies; 9+ messages in thread
From: David Miller @ 2013-01-16 21:10 UTC (permalink / raw)
  To: subramanian.vijay; +Cc: therbert, netdev, netdev, eric.dumazet

From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Wed, 16 Jan 2013 13:09:10 -0800 (PST)

> 
> +/* The port cannot be reused if the older socket is in LISTEN state
> or if
> + * either the old or new one does not allow reuse
> + */

Please post patches without them being destroyed by your mailer.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-14 20:00 Tom Herbert
  2013-01-14 20:20 ` YOSHIFUJI Hideaki
@ 2013-01-16 21:09 ` Vijay Subramanian
  2013-01-16 21:10   ` David Miller
  2013-01-16 21:30   ` Tom Herbert
  1 sibling, 2 replies; 9+ messages in thread
From: Vijay Subramanian @ 2013-01-16 21:09 UTC (permalink / raw)
  To: Tom Herbert; +Cc: netdev, davem, netdev, eric.dumazet



> 	 * Unlike other sk lookup places we do not check
> @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
> 		    (!sk->sk_bound_dev_if ||
> 		     !sk2->sk_bound_dev_if ||
> 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
> -			if (!reuse || !sk2->sk_reuse ||
> -			    sk2->sk_state == TCP_LISTEN) {
> +			if ((!reuse || !sk2->sk_reuse ||
> +			    sk2->sk_state == TCP_LISTEN) &&
> +			    (!reuseport || !sk2->sk_reuseport ||
> +			    (sk2->sk_state != TCP_TIME_WAIT &&
> +			     uid != sock_i_uid(sk2)))) {
> 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
> 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
> 				    sk2_rcv_saddr == sk_rcv_saddr(sk))


How about introducing some helper functions to make 
inet_csk_bid_conflict() and inet6_csk_bind_conflict() more readable as in 
patch below? We can add another test for reuseport() for soreuseport 
patches.

udp.c already has ipv4_rcv_saddr_equal() but it seems to call 
ipv6_only_sock() and not inet_v6_ipv6only() which is needed in 
inet{6}_csk_bid_conflict().So I added sk_rcv_saddr_equal().

Also the bind_conflict functions can return bool instead of int (not 
implemented in patch below).


If patch idea below is ok, I will send it officially.

Thanks,
Vijay


diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 1832927..c15d2eb 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -22,6 +22,8 @@

  #include <net/inet_sock.h>
  #include <net/request_sock.h>
+#include <net/tcp_states.h>
+#include <net/inet_timewait_sock.h>

  #define INET_CSK_DEBUG 1

@@ -205,6 +207,37 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
  #endif
  }

+/* The port cannot be reused if the older socket is in LISTEN state or if
+ * either the old or new one does not allow reuse
+ */
+static inline bool sk_reuse_equal(int reuse, const struct sock *sk2)
+{
+	return !reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN;
+}
+
+/* The port cannot be reused if both sockets are bound to the same device or
+ * if either one is not bound
+ */
+static inline bool sk_bound_dev_equal(const struct sock *sk,
+				      const struct sock *sk2)
+{
+	return !sk->sk_bound_dev_if || !sk2->sk_bound_dev_if ||
+	       sk->sk_bound_dev_if == sk2->sk_bound_dev_if;
+}
+
+/* The port cannot be reused if both sockets have the same rcv_saddr
+ * or if either rcv_saddr is NULL
+ */
+static inline bool sk_rcv_saddr_equal(const struct sock *sk1,
+				      const struct sock *sk2)
+{
+	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
+	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+
+	return !sk2_rcv_saddr || !sk1_rcv_saddr ||
+	       sk2_rcv_saddr == sk1_rcv_saddr;
+}
+
  /*
   *	Reset the retransmission timer
   */
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d0670f0..375cca3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -68,26 +68,15 @@ int inet_csk_bind_conflict(const struct sock *sk,
  	 */

  	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    !inet_v6_ipv6only(sk2) &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
-					break;
-			}
-			if (!relax && reuse && sk2->sk_reuse &&
-			    sk2->sk_state != TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+		if (sk != sk2 && !inet_v6_ipv6only(sk2) &&
+		    sk_bound_dev_equal(sk, sk2)) {
+			if (sk_reuse_equal(reuse, sk2) &&
+			    sk_rcv_saddr_equal(sk, sk2))
+				break;

-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
-					break;
-			}
+			if (!relax && sk_reuse_equal(reuse, sk2) &&
+			    sk_rcv_saddr_equal(sk, sk2))
+				break;
  		}
  	}
  	return node != NULL;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 3064785..8ebe20d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -39,13 +39,9 @@ int inet6_csk_bind_conflict(const struct sock *sk,
  	 * vs net namespaces issues.
  	 */
  	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
+		if (sk != sk2 && sk_bound_dev_equal(sk, sk2) &&
+		    sk_reuse_equal(sk->sk_reuse, sk2) &&
+		    ipv6_rcv_saddr_equal(sk, sk2))
  			break;
  	}

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] soreuseport: TCP/IPv4 implementation
  2013-01-14 20:00 Tom Herbert
@ 2013-01-14 20:20 ` YOSHIFUJI Hideaki
  2013-01-16 21:09 ` Vijay Subramanian
  1 sibling, 0 replies; 9+ messages in thread
From: YOSHIFUJI Hideaki @ 2013-01-14 20:20 UTC (permalink / raw)
  To: Tom Herbert; +Cc: netdev, davem, netdev, eric.dumazet

Tom Herbert wrote:
>  static inline struct sock *inet_lookup_listener(struct net *net,
>  		struct inet_hashinfo *hashinfo,
> +		__be32 saddr, __be16 sport,
>  		__be32 daddr, __be16 dport, int dif)
>  {
> -	return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif);
> +	return __inet_lookup_listener(net, hashinfo, saddr, sport,
> +	    daddr, ntohs(dport), dif);
>  }
>  
>  /* Socket demux engine toys. */
> @@ -358,7 +364,8 @@ static inline struct sock *__inet_lookup(struct net *net,
>  	struct sock *sk = __inet_lookup_established(net, hashinfo,
>  				saddr, sport, daddr, hnum, dif);
>  
> -	return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif);
> +	return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport,
> +	    daddr, hnum, dif);
>  }
>  

Please fix indent.

--yoshfuji

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/5] soreuseport: TCP/IPv4 implementation
@ 2013-01-14 20:00 Tom Herbert
  2013-01-14 20:20 ` YOSHIFUJI Hideaki
  2013-01-16 21:09 ` Vijay Subramanian
  0 siblings, 2 replies; 9+ messages in thread
From: Tom Herbert @ 2013-01-14 20:00 UTC (permalink / raw)
  To: netdev, davem; +Cc: netdev, eric.dumazet

Allow multiple listener sockets to bind to the same port.

Motivation for soresuseport would be something like a web server
binding to port 80 running with multiple threads, where each thread
might have it's own listener socket.  This could be done as an
alternative to other models: 1) have one listener thread which
dispatches completed connections to workers. 2) accept on a single
listener socket from multiple threads.  In case #1 the listener thread
can easily become the bottleneck with high connection turn-over rate.
In case #2, the proportion of connections accepted per thread tends
to be uneven under high connection load (assuming simple event loop:
while (1) { accept(); process() }, wakeup does not promote fairness
among the sockets.  We have seen the  disproportion to be as high
as 3:1 ratio between thread accepting most connections and the one
accepting the fewest.  With so_reusport the distribution is
uniform.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/net/inet_hashtables.h   |   13 ++++++++--
 net/ipv4/inet_connection_sock.c |   48 ++++++++++++++++++++++++++++++---------
 net/ipv4/inet_hashtables.c      |   28 +++++++++++++++++-----
 net/ipv4/tcp_ipv4.c             |    4 ++-
 4 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 67a8fa0..2968c8f 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -81,7 +81,9 @@ struct inet_bind_bucket {
 	struct net		*ib_net;
 #endif
 	unsigned short		port;
-	signed short		fastreuse;
+	signed char		fastreuse;
+	signed char		fastreuseport;
+	int			fastuid;
 	int			num_owners;
 	struct hlist_node	node;
 	struct hlist_head	owners;
@@ -257,15 +259,19 @@ extern void inet_unhash(struct sock *sk);
 
 extern struct sock *__inet_lookup_listener(struct net *net,
 					   struct inet_hashinfo *hashinfo,
+					   const __be32 saddr,
+					   const __be16 sport,
 					   const __be32 daddr,
 					   const unsigned short hnum,
 					   const int dif);
 
 static inline struct sock *inet_lookup_listener(struct net *net,
 		struct inet_hashinfo *hashinfo,
+		__be32 saddr, __be16 sport,
 		__be32 daddr, __be16 dport, int dif)
 {
-	return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif);
+	return __inet_lookup_listener(net, hashinfo, saddr, sport,
+	    daddr, ntohs(dport), dif);
 }
 
 /* Socket demux engine toys. */
@@ -358,7 +364,8 @@ static inline struct sock *__inet_lookup(struct net *net,
 	struct sock *sk = __inet_lookup_established(net, hashinfo,
 				saddr, sport, daddr, hnum, dif);
 
-	return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif);
+	return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport,
+	    daddr, hnum, dif);
 }
 
 static inline struct sock *inet_lookup(struct net *net,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d0670f0..c2ce445 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -59,6 +59,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	struct sock *sk2;
 	struct hlist_node *node;
 	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	int uid = sock_i_uid((struct sock *)sk);
 
 	/*
 	 * Unlike other sk lookup places we do not check
@@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
+			if ((!reuse || !sk2->sk_reuse ||
+			    sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			    (sk2->sk_state != TCP_TIME_WAIT &&
+			     uid != sock_i_uid(sk2)))) {
 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -106,6 +111,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 	int ret, attempts = 5;
 	struct net *net = sock_net(sk);
 	int smallest_size = -1, smallest_rover;
+	int uid = sock_i_uid(sk);
 
 	local_bh_disable();
 	if (!snum) {
@@ -125,9 +131,12 @@ again:
 			spin_lock(&head->lock);
 			inet_bind_bucket_for_each(tb, node, &head->chain)
 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
-					if (tb->fastreuse > 0 &&
-					    sk->sk_reuse &&
-					    sk->sk_state != TCP_LISTEN &&
+					if (((tb->fastreuse > 0 &&
+					      sk->sk_reuse &&
+					      sk->sk_state != TCP_LISTEN) ||
+					     (tb->fastreuseport > 0 &&
+					      sk->sk_reuseport &&
+					      tb->fastuid == uid)) &&
 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
@@ -185,14 +194,17 @@ tb_found:
 		if (sk->sk_reuse == SK_FORCE_REUSE)
 			goto success;
 
-		if (tb->fastreuse > 0 &&
-		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+		if (((tb->fastreuse > 0 &&
+		      sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+		     (tb->fastreuseport > 0 &&
+		      sk->sk_reuseport && tb->fastuid == uid)) &&
 		    smallest_size == -1) {
 			goto success;
 		} else {
 			ret = 1;
 			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
-				if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+				if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+				     (sk->sk_reuseport && tb->fastuid == uid)) &&
 				    smallest_size != -1 && --attempts >= 0) {
 					spin_unlock(&head->lock);
 					goto again;
@@ -212,9 +224,23 @@ tb_not_found:
 			tb->fastreuse = 1;
 		else
 			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
+		if (sk->sk_reuseport) {
+			tb->fastreuseport = 1;
+			tb->fastuid = uid;
+		} else {
+			tb->fastreuseport = 0;
+			tb->fastuid = 0;
+		}
+	} else {
+		if (tb->fastreuse &&
+		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+			tb->fastreuse = 0;
+		if (tb->fastreuseport &&
+		    (!sk->sk_reuseport || tb->fastuid != uid)) {
+			tb->fastreuseport = 0;
+			tb->fastuid = 0;
+		}
+	}
 success:
 	if (!inet_csk(sk)->icsk_bind_hash)
 		inet_bind_hash(sk, tb, snum);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fa3ae81..491cb85 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 		write_pnet(&tb->ib_net, hold_net(net));
 		tb->port      = snum;
 		tb->fastreuse = 0;
+		tb->fastreuseport = 0;
 		tb->num_owners = 0;
 		INIT_HLIST_HEAD(&tb->owners);
 		hlist_add_head(&tb->node, &head->chain);
@@ -151,16 +152,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
 			!ipv6_only_sock(sk)) {
 		__be32 rcv_saddr = inet->inet_rcv_saddr;
-		score = sk->sk_family == PF_INET ? 1 : 0;
+		score = sk->sk_family == PF_INET ? 2 : 1;
 		if (rcv_saddr) {
 			if (rcv_saddr != daddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -176,6 +177,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 struct sock *__inet_lookup_listener(struct net *net,
 				    struct inet_hashinfo *hashinfo,
+				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
 				    const int dif)
 {
@@ -183,17 +185,29 @@ struct sock *__inet_lookup_listener(struct net *net,
 	struct hlist_nulls_node *node;
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	int score, hiscore;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
 
 	rcu_read_lock();
 begin:
 	result = NULL;
-	hiscore = -1;
+	hiscore = 0;
 	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif);
 		if (score > hiscore) {
 			result = sk;
 			hiscore = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet_ehashfn(net, daddr, hnum,
+				    saddr, sport);
+				matches = 1;
+			}
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
+				result = sk;
+			phash = next_pseudo_random32(phash);
 		}
 	}
 	/*
@@ -501,7 +515,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
 				if (net_eq(ib_net(tb), net) &&
 				    tb->port == port) {
-					if (tb->fastreuse >= 0)
+					if (tb->fastreuse >= 0 ||
+					    tb->fastreuseport >= 0)
 						goto next_port;
 					WARN_ON(hlist_empty(&tb->owners));
 					if (!check_established(death_row, sk,
@@ -518,6 +533,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 				break;
 			}
 			tb->fastreuse = -1;
+			tb->fastreuseport = -1;
 			goto ok;
 
 		next_port:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6ce9ca..bbbdcc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
-					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     &tcp_hashinfo, ip_hdr(skb)->saddr,
+					     th->source, ip_hdr(skb)->daddr,
 					     ntohs(th->source), inet_iif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
@@ -2074,6 +2075,7 @@ do_time_wait:
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
 							&tcp_hashinfo,
+							iph->saddr, th->source,
 							iph->daddr, th->dest,
 							inet_iif(skb));
 		if (sk2) {
-- 
1.7.7.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-01-26  3:40 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-22 19:50 [PATCH 2/5] soreuseport: TCP/IPv4 implementation Tom Herbert
2013-01-25  8:08 ` Steffen Klassert
2013-01-26  3:40   ` Tom Herbert
  -- strict thread matches above, loose matches on Subject: below --
2013-01-14 20:00 Tom Herbert
2013-01-14 20:20 ` YOSHIFUJI Hideaki
2013-01-16 21:09 ` Vijay Subramanian
2013-01-16 21:10   ` David Miller
2013-01-16 21:30   ` Tom Herbert
2013-01-16 22:14     ` Vijay Subramanian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.