netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Weiping Pan <wpan@redhat.com>
To: netdev@vger.kernel.org
Cc: brutus@google.com, Weiping Pan <wpan@redhat.com>
Subject: [PATCH 3/3] delete request_sock->friend
Date: Wed,  5 Dec 2012 10:54:19 +0800	[thread overview]
Message-ID: <d19e6cf3f93f50fb57e89149eac1d0af9a04e153.1354674154.git.wpan@redhat.com> (raw)
In-Reply-To: <cover.1354674151.git.wpan@redhat.com>
In-Reply-To: <cover.1354674151.git.wpan@redhat.com>

The sock pointed by request_sock->friend may be freed since it does not have a
lock to protect it.
I just delete request_sock->friend since I think it is useless.

For sk_buff->friend, it has the same problem, and we use
"atomic_add(skb->truesize, &sk->sk_wmem_alloc)" to guarantee that the sock can
not be freed before the skb is freed.

Then for 3-way handshake with tcp friends enabled,
SYN->friend is NULL, SYN/ACK->friend is set in tcp_make_synack(),
and ACK->friend is set in tcp_send_ack().

Signed-off-by: Weiping Pan <wpan@redhat.com>
---
 include/net/inet_connection_sock.h |    4 ++
 include/net/request_sock.h         |    1 -
 net/ipv4/inet_connection_sock.c    |   58 +++++++++++++++++++++++------------
 net/ipv4/tcp_input.c               |   10 ------
 net/ipv4/tcp_ipv4.c                |    7 +++-
 net/ipv4/tcp_minisocks.c           |    7 ++++-
 net/ipv4/tcp_output.c              |   21 ++++++++-----
 net/ipv6/tcp_ipv6.c                |    1 -
 8 files changed, 66 insertions(+), 43 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ba1d361..883e029 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -147,6 +147,10 @@ static inline void *inet_csk_ca(const struct sock *sk)
 extern struct sock *inet_csk_clone_lock(const struct sock *sk,
 					const struct request_sock *req,
 					const gfp_t priority);
+extern struct sock *inet_csk_friend_clone_lock(const struct sock *sk,
+					const struct request_sock *req,
+					const struct sk_buff *skb,
+					const gfp_t priority);
 
 enum inet_csk_ack_state_t {
 	ICSK_ACK_SCHED	= 1,
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c6dfa26..a51dbd1 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -66,7 +66,6 @@ struct request_sock {
 	unsigned long			expires;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
-	struct sock			*friend;
 	u32				secid;
 	u32				peer_secid;
 };
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ce4b79b..7af92ed 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -659,26 +659,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 	if (newsk != NULL) {
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 
-		if (req->friend) {
-			/*
-			 * Make friends with the requestor but the ACK of
-			 * the request is already in-flight so the race is
-			 * on to make friends before the ACK is processed.
-			 * If the requestor's sk_friend value is != NULL
-			 * then the requestor has already processed the
-			 * ACK so indicate state change to wake'm up.
-			 */
-			struct sock *was;
-
-			sock_hold(req->friend);
-			newsk->sk_friend = req->friend;
-			sock_hold(newsk);
-			was = xchg(&req->friend->sk_friend, newsk);
-			/* If requester already connect()ed, maybe sleeping */
-			if (was && !sock_flag(req->friend, SOCK_DEAD))
-				sk->sk_state_change(req->friend);
-		}
-
 		newsk->sk_state = TCP_SYN_RECV;
 		newicsk->icsk_bind_hash = NULL;
 
@@ -700,6 +680,44 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
 
+/**
+ *	inet_csk_friend_clone_lock - clone an inet socket, and lock its clone
+ *	@sk: the socket to clone
+ *	@req: request_sock
+ *	@skb: who sends the request
+ *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *inet_csk_friend_clone_lock(const struct sock *sk,
+				 const struct request_sock *req,
+				 const struct sk_buff *skb,
+				 const gfp_t priority)
+{
+	struct sock *newsk = inet_csk_clone_lock(sk, req, priority);
+
+	if (newsk) {
+		struct sock *friend = skb->friend;
+		if (friend) {
+			/*
+			 * Make friends.
+			 */
+			struct sock *was;
+
+			sock_hold(friend);
+			newsk->sk_friend = friend;
+			sock_hold(newsk);
+			was = xchg(&friend->sk_friend, newsk);
+			/* If requester already connect()ed, maybe sleeping */
+			if (was && !sock_flag(friend, SOCK_DEAD))
+				sk->sk_state_change(friend);
+		}
+	}
+
+	return newsk;
+}
+EXPORT_SYMBOL_GPL(inet_csk_friend_clone_lock);
+
 /*
  * At this point, there should be no process reference to this
  * socket, and thus no user references at all.  Therefore we
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9640a81..39db09d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5726,16 +5726,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 *    state to ESTABLISHED..."
 		 */
 
-		if (skb->friend) {
-			/*
-			 * If friends haven't been made yet, our sk_friend
-			 * still == NULL, then update with the ACK's friend
-			 * value (the listen()er's sock addr) which is used
-			 * as a place holder.
-			 */
-			cmpxchg(&sk->sk_friend, NULL, skb->friend);
-		}
-
 		TCP_ECN_rcv_synack(tp, th);
 
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f494914..8d61e4c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1512,8 +1512,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 #endif
 
-	req->friend = skb->friend;
-
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
@@ -1873,6 +1871,11 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
 		goto csum_err;
 
+	if (sysctl_tcp_friends && skb->friend) {
+		skb->sk = skb->friend;
+		skb->destructor = sock_wfree;
+	}
+
 	if (sk->sk_state == TCP_LISTEN) {
 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
 		if (!nsk)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 36d832a..753126e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -383,7 +383,12 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
  */
 struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
 {
-	struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
+	struct sock *newsk = NULL;
+
+	if (sysctl_tcp_friends && skb->friend)
+		newsk = inet_csk_friend_clone_lock(sk, req, skb, GFP_ATOMIC);
+	else
+		newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
 
 	if (newsk != NULL) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 509c5e3..4d71549 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1028,13 +1028,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
-		/* Only try to make friends if enabled */
-		if (sysctl_tcp_friends)
-			skb->friend = sk;
-
+	if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
-	} else
+	else
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
 							   &md5);
 	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
@@ -1050,7 +1046,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	skb_orphan(skb);
 	skb->sk = sk;
-	skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
+
+	if (skb->friend)
+		skb->destructor = NULL;
+	else
+		skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
 			  tcp_wfree : sock_wfree;
 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 
@@ -2734,8 +2734,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	memset(&opts, 0, sizeof(opts));
 
 	/* Only try to make friends if enabled */
-	if (sysctl_tcp_friends)
+	if (sysctl_tcp_friends) {
 		skb->friend = sk;
+		atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	}
 
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
@@ -3120,6 +3122,9 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Send it off, this clears delayed acks for us. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
+
+	if (sysctl_tcp_friends)
+		buff->friend = sk;
 	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
 }
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 828d5f7..6565cf5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -969,7 +969,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
 #endif
 
-	req->friend = skb->friend;
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-- 
1.7.4.4

  parent reply	other threads:[~2012-12-05  2:54 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-18 10:19 Fwd: Re: [PATCH v3] net-tcp: TCP/IP stack bypass for loopback connections Weiping Pan
2012-10-18 12:23 ` Bruce Curtis
2012-12-05  2:54   ` [RFC PATCH net-next 0/3 V4] " Weiping Pan
2012-12-05  2:54     ` [PATCH 1/3] Bruce's orignal tcp friend V3 Weiping Pan
2012-12-05  2:54     ` [PATCH 2/3] fix panic in tcp_close() Weiping Pan
2012-12-05  2:54     ` Weiping Pan [this message]
2012-12-10 21:02     ` [RFC PATCH net-next 0/3 V4] net-tcp: TCP/IP stack bypass for loopback connections David Miller
2012-12-12 14:13       ` Weiping Pan
     [not found]       ` <117a10f9575d95d6a9ea4602ea7376e2b6d5ccd1.1355320533.git.wpan@redhat.com>
2012-12-12 14:29         ` [RFC PATCH net-next 4/4 V4] try to fix performance regression Weiping Pan
2012-12-12 14:57           ` David Laight
2012-12-13 14:05             ` Weiping Pan
2012-12-13 18:25               ` Rick Jones
2012-12-14  5:53                 ` Weiping Pan
2012-12-12 16:25           ` Eric Dumazet
2012-12-13 14:09             ` Weiping Pan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d19e6cf3f93f50fb57e89149eac1d0af9a04e153.1354674154.git.wpan@redhat.com \
    --to=wpan@redhat.com \
    --cc=brutus@google.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).