linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mao Wenan <maowenan@huawei.com>
To: <davem@davemloft.net>
Cc: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<edumazet@google.com>, Mao Wenan <maowenan@huawei.com>
Subject: [PATCH net v2] tcp: avoid creating multiple req socks with the same tuples
Date: Wed, 12 Jun 2019 11:57:15 +0800	[thread overview]
Message-ID: <20190612035715.166676-1-maowenan@huawei.com> (raw)

There is one issue about bonding mode BOND_MODE_BROADCAST, and
two slaves with diffierent affinity, so packets will be handled
by different cpu. These are two pre-conditions in this case.

When two slaves receive the same syn packets at the same time,
two request sock(reqsk) will be created if below situation happens:
1. syn1 arrived tcp_conn_request, create reqsk1 and have not yet called
inet_csk_reqsk_queue_hash_add.
2. syn2 arrived tcp_v4_rcv, it goes to tcp_conn_request and create
reqsk2
because it can't find reqsk1 in the __inet_lookup_skb.

Then reqsk1 and reqsk2 are added to establish hash table, and two synack
with different
seq(seq1 and seq2) are sent to client, then tcp ack arrived and will be
processed in tcp_v4_rcv and tcp_check_req, if __inet_lookup_skb find the
reqsk2, and
tcp ack packet is ack_seq is seq1, it will be failed after checking:
TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)
and then tcp rst will be sent to client and close the connection.

To fix this, call __inet_lookup_established() before __sk_nulls_add_node_rcu()
in inet_ehash_insert(). If there is existed reqsk with same tuples in
established hash table, directly to remove current reqsk2, and does not send
synack to client.

Signed-off-by: Mao Wenan <maowenan@huawei.com>
---
 v2: move __inet_lookup_established from tcp_conn_request() to inet_ehash_insert()
 as Eric suggested.
---
 include/net/inet_connection_sock.h |  2 +-
 net/ipv4/inet_connection_sock.c    | 16 ++++++++++++----
 net/ipv4/inet_hashtables.c         | 13 +++++++++++++
 net/ipv4/tcp_input.c               |  7 ++++---
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c57d53e7e02c..2d3538e333cb 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
 				      struct request_sock *req,
 				      struct sock *child);
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout);
 struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
 					 struct request_sock *req,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 13ec7c3a9c49..fd45ed2fd985 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -749,7 +749,7 @@ static void reqsk_timer_handler(struct timer_list *t)
 	inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
 }
 
-static void reqsk_queue_hash_req(struct request_sock *req,
+static bool reqsk_queue_hash_req(struct request_sock *req,
 				 unsigned long timeout)
 {
 	req->num_retrans = 0;
@@ -759,19 +759,27 @@ static void reqsk_queue_hash_req(struct request_sock *req,
 	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
 	mod_timer(&req->rsk_timer, jiffies + timeout);
 
-	inet_ehash_insert(req_to_sk(req), NULL);
+	if (!inet_ehash_insert(req_to_sk(req), NULL)) {
+		if (timer_pending(&req->rsk_timer))
+			del_timer_sync(&req->rsk_timer);
+		return false;
+	}
 	/* before letting lookups find us, make sure all req fields
 	 * are committed to memory and refcnt initialized.
 	 */
 	smp_wmb();
 	refcount_set(&req->rsk_refcnt, 2 + 1);
+	return true;
 }
 
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout)
 {
-	reqsk_queue_hash_req(req, timeout);
+	if (!reqsk_queue_hash_req(req, timeout))
+		return false;
+
 	inet_csk_reqsk_queue_added(sk);
+	return true;
 }
 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c4503073248b..b6a1b5334565 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -477,6 +477,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
 	struct inet_ehash_bucket *head;
 	spinlock_t *lock;
 	bool ret = true;
+	struct sock *reqsk = NULL;
 
 	WARN_ON_ONCE(!sk_unhashed(sk));
 
@@ -486,6 +487,18 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
 	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
 	spin_lock(lock);
+	if (!osk)
+		reqsk = __inet_lookup_established(sock_net(sk), &tcp_hashinfo,
+							sk->sk_daddr, sk->sk_dport,
+							sk->sk_rcv_saddr, sk->sk_num,
+							sk->sk_bound_dev_if, sk->sk_bound_dev_if);
+	if (unlikely(reqsk)) {
+		ret = false;
+		reqsk_free(inet_reqsk(sk));
+		spin_unlock(lock);
+		return ret;
+	}
+
 	if (osk) {
 		WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
 		ret = sk_nulls_del_node_init_rcu(osk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38dfc308c0fb..358272394590 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6570,9 +6570,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		sock_put(fastopen_sk);
 	} else {
 		tcp_rsk(req)->tfo_listener = false;
-		if (!want_cookie)
-			inet_csk_reqsk_queue_hash_add(sk, req,
-				tcp_timeout_init((struct sock *)req));
+		if (!want_cookie && !inet_csk_reqsk_queue_hash_add(sk, req,
+					tcp_timeout_init((struct sock *)req)))
+			return 0;
+
 		af_ops->send_synack(sk, dst, &fl, req, &foc,
 				    !want_cookie ? TCP_SYNACK_NORMAL :
 						   TCP_SYNACK_COOKIE);
-- 
2.20.1


             reply	other threads:[~2019-06-12  3:49 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-12  3:57 Mao Wenan [this message]
2019-06-12 16:25 ` [PATCH net v2] tcp: avoid creating multiple req socks with the same tuples David Miller
2019-06-13  6:34   ` maowenan
2019-06-12 16:30 ` Eric Dumazet
2019-06-13  4:21   ` maowenan
2019-06-14  4:19     ` maowenan
2019-06-14  4:28       ` Eric Dumazet
2019-06-14  9:35         ` maowenan
2019-06-14 12:27           ` Eric Dumazet
2019-06-14 14:03             ` maowenan
2019-06-14 14:25               ` Eric Dumazet
2019-06-14 14:34                 ` Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190612035715.166676-1-maowenan@huawei.com \
    --to=maowenan@huawei.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).