netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] mptcp: simplify mptcp_accept()
@ 2020-03-12 15:13 Paolo Abeni
  2020-03-12 15:13 ` [PATCH net-next 1/2] mptcp: create msk early Paolo Abeni
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Paolo Abeni @ 2020-03-12 15:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, Mat Martineau

Currently we allocate the MPTCP master socket at accept time.

The above makes mptcp_accept() quite complex, and requires checks is several
places for NULL MPTCP master socket.

These series simplify the MPTCP accept implementation, moving the master socket
allocation at syn-ack time, so that we drop unneeded checks with the follow-up
patch.

Note: patch 2/2 will conflict with net commit 2398e3991bda ("mptcp: always 
include dack if possible."). If the following will help, I can send a rebased
version of the series after that net is merged back into net-next.

Paolo Abeni (2):
  mptcp: create msk early
  mptcp: drop unneeded checks

 net/mptcp/options.c  |  2 +-
 net/mptcp/protocol.c | 83 ++++++++++++++++++++++++--------------------
 net/mptcp/protocol.h |  4 +--
 net/mptcp/subflow.c  | 50 ++++++++++++++------------
 net/mptcp/token.c    | 31 ++---------------
 5 files changed, 78 insertions(+), 92 deletions(-)

-- 
2.21.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH net-next 1/2] mptcp: create msk early
  2020-03-12 15:13 [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni
@ 2020-03-12 15:13 ` Paolo Abeni
  2020-03-12 15:13 ` [PATCH net-next 2/2] mptcp: drop unneeded checks Paolo Abeni
  2020-03-13 14:09 ` [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni
  2 siblings, 0 replies; 4+ messages in thread
From: Paolo Abeni @ 2020-03-12 15:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, Mat Martineau

This change move the mptcp socket allocation from mptcp_accept() to
subflow_syn_recv_sock(), so that subflow->conn is now always set
for the non fallback scenario.

It allows cleaning up a bit mptcp_accept() reducing the additional
locking and will allow fourther cleanup in later patch

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 83 ++++++++++++++++++++++++--------------------
 net/mptcp/protocol.h |  4 +--
 net/mptcp/subflow.c  | 32 +++++++++++------
 net/mptcp/token.c    | 31 ++---------------
 4 files changed, 70 insertions(+), 80 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c0cef07f4382..04c3caed92df 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -820,9 +820,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
 }
 #endif
 
-static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
+struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
 {
+	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 	struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+	struct mptcp_sock *msk;
+	u64 ack_seq;
 
 	if (!nsk)
 		return NULL;
@@ -832,6 +835,36 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
 		inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
 #endif
 
+	__mptcp_init_sock(nsk);
+
+	msk = mptcp_sk(nsk);
+	msk->local_key = subflow_req->local_key;
+	msk->token = subflow_req->token;
+	msk->subflow = NULL;
+
+	if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
+		bh_unlock_sock(nsk);
+
+		/* we can't call into mptcp_close() here - possible BH context
+		 * free the sock directly
+		 */
+		nsk->sk_prot->destroy(nsk);
+		sk_free(nsk);
+		return NULL;
+	}
+
+	msk->write_seq = subflow_req->idsn + 1;
+	if (subflow_req->remote_key_valid) {
+		msk->can_ack = true;
+		msk->remote_key = subflow_req->remote_key;
+		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
+		ack_seq++;
+		msk->ack_seq = ack_seq;
+	}
+	bh_unlock_sock(nsk);
+
+	/* keep a single reference */
+	__sock_put(nsk);
 	return nsk;
 }
 
@@ -859,40 +892,26 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
 		struct mptcp_subflow_context *subflow;
 		struct sock *new_mptcp_sock;
 		struct sock *ssk = newsk;
-		u64 ack_seq;
 
 		subflow = mptcp_subflow_ctx(newsk);
-		lock_sock(sk);
+		new_mptcp_sock = subflow->conn;
 
-		local_bh_disable();
-		new_mptcp_sock = mptcp_sk_clone_lock(sk);
-		if (!new_mptcp_sock) {
-			*err = -ENOBUFS;
-			local_bh_enable();
-			release_sock(sk);
-			mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0);
-			tcp_close(newsk, 0);
-			return NULL;
+		/* is_mptcp should be false if subflow->conn is missing, see
+		 * subflow_syn_recv_sock()
+		 */
+		if (WARN_ON_ONCE(!new_mptcp_sock)) {
+			tcp_sk(newsk)->is_mptcp = 0;
+			return newsk;
 		}
 
-		__mptcp_init_sock(new_mptcp_sock);
+		/* acquire the 2nd reference for the owning socket */
+		sock_hold(new_mptcp_sock);
 
+		local_bh_disable();
+		bh_lock_sock(new_mptcp_sock);
 		msk = mptcp_sk(new_mptcp_sock);
-		msk->local_key = subflow->local_key;
-		msk->token = subflow->token;
-		msk->subflow = NULL;
 		msk->first = newsk;
 
-		mptcp_token_update_accept(newsk, new_mptcp_sock);
-
-		msk->write_seq = subflow->idsn + 1;
-		if (subflow->can_ack) {
-			msk->can_ack = true;
-			msk->remote_key = subflow->remote_key;
-			mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
-			ack_seq++;
-			msk->ack_seq = ack_seq;
-		}
 		newsk = new_mptcp_sock;
 		mptcp_copy_inaddrs(newsk, ssk);
 		list_add(&subflow->node, &msk->conn_list);
@@ -903,18 +922,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
 		inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
 		bh_unlock_sock(new_mptcp_sock);
 		local_bh_enable();
-		release_sock(sk);
-
-		/* the subflow can already receive packet, avoid racing with
-		 * the receive path and process the pending ones
-		 */
-		lock_sock(ssk);
-		subflow->rel_write_seq = 1;
-		subflow->tcp_sock = ssk;
-		subflow->conn = new_mptcp_sock;
-		if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue)))
-			mptcp_subflow_data_available(ssk);
-		release_sock(ssk);
 	}
 
 	return newsk;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 313558fa8185..9baf6fcba914 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -193,6 +193,7 @@ void mptcp_proto_init(void);
 int mptcp_proto_v6_init(void);
 #endif
 
+struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
 void mptcp_get_options(const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx);
 
@@ -202,8 +203,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk);
 int mptcp_token_new_request(struct request_sock *req);
 void mptcp_token_destroy_request(u32 token);
 int mptcp_token_new_connect(struct sock *sk);
-int mptcp_token_new_accept(u32 token);
-void mptcp_token_update_accept(struct sock *sk, struct sock *conn);
+int mptcp_token_new_accept(u32 token, struct sock *conn);
 void mptcp_token_destroy(u32 token);
 
 void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0de2a44bdaa0..047b088e4617 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -182,6 +182,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
 	struct mptcp_subflow_request_sock *subflow_req;
 	struct tcp_options_received opt_rx;
+	struct sock *new_msk = NULL;
 	struct sock *child;
 
 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -197,7 +198,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			 * out-of-order pkt, which will not carry the MP_CAPABLE
 			 * opt even on mptcp enabled paths
 			 */
-			goto create_child;
+			goto create_msk;
 		}
 
 		opt_rx.mptcp.mp_capable = 0;
@@ -207,7 +208,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			subflow_req->remote_key_valid = 1;
 		} else {
 			subflow_req->mp_capable = 0;
+			goto create_child;
 		}
+
+create_msk:
+		new_msk = mptcp_sk_clone(listener->conn, req);
+		if (!new_msk)
+			subflow_req->mp_capable = 0;
 	}
 
 create_child:
@@ -221,22 +228,22 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 		 * handshake
 		 */
 		if (!ctx)
-			return child;
+			goto out;
 
 		if (ctx->mp_capable) {
-			if (mptcp_token_new_accept(ctx->token))
-				goto close_child;
+			/* new mpc subflow takes ownership of the newly
+			 * created mptcp socket
+			 */
+			ctx->conn = new_msk;
+			new_msk = NULL;
 		}
 	}
 
+out:
+	/* dispose of the left over mptcp master, if any */
+	if (unlikely(new_msk))
+		sock_put(new_msk);
 	return child;
-
-close_child:
-	pr_debug("closing child socket");
-	tcp_send_active_reset(child, GFP_ATOMIC);
-	inet_csk_prepare_forced_close(child);
-	tcp_done(child);
-	return NULL;
 }
 
 static struct inet_connection_sock_af_ops subflow_specific;
@@ -793,6 +800,9 @@ static void subflow_ulp_clone(const struct request_sock *req,
 	new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
 	new_ctx->tcp_state_change = old_ctx->tcp_state_change;
 	new_ctx->tcp_write_space = old_ctx->tcp_write_space;
+	new_ctx->rel_write_seq = 1;
+	new_ctx->tcp_sock = newsk;
+
 	new_ctx->mp_capable = 1;
 	new_ctx->fourth_ack = subflow_req->remote_key_valid;
 	new_ctx->can_ack = subflow_req->remote_key_valid;
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 84d887806090..b71b53c0ac8d 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -128,45 +128,18 @@ int mptcp_token_new_connect(struct sock *sk)
  *
  * Called when a SYN packet creates a new logical connection, i.e.
  * is not a join request.
- *
- * We don't have an mptcp socket yet at that point.
- * This is paired with mptcp_token_update_accept, called on accept().
  */
-int mptcp_token_new_accept(u32 token)
+int mptcp_token_new_accept(u32 token, struct sock *conn)
 {
 	int err;
 
 	spin_lock_bh(&token_tree_lock);
-	err = radix_tree_insert(&token_tree, token, &token_used);
+	err = radix_tree_insert(&token_tree, token, conn);
 	spin_unlock_bh(&token_tree_lock);
 
 	return err;
 }
 
-/**
- * mptcp_token_update_accept - update token to map to mptcp socket
- * @conn: the new struct mptcp_sock
- * @sk: the initial subflow for this mptcp socket
- *
- * Called when the first mptcp socket is created on accept to
- * refresh the dummy mapping (done to reserve the token) with
- * the mptcp_socket structure that wasn't allocated before.
- */
-void mptcp_token_update_accept(struct sock *sk, struct sock *conn)
-{
-	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-	void __rcu **slot;
-
-	spin_lock_bh(&token_tree_lock);
-	slot = radix_tree_lookup_slot(&token_tree, subflow->token);
-	WARN_ON_ONCE(!slot);
-	if (slot) {
-		WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used);
-		radix_tree_replace_slot(&token_tree, slot, conn);
-	}
-	spin_unlock_bh(&token_tree_lock);
-}
-
 /**
  * mptcp_token_destroy_request - remove mptcp connection/token
  * @token - token of mptcp connection to remove
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH net-next 2/2] mptcp: drop unneeded checks
  2020-03-12 15:13 [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni
  2020-03-12 15:13 ` [PATCH net-next 1/2] mptcp: create msk early Paolo Abeni
@ 2020-03-12 15:13 ` Paolo Abeni
  2020-03-13 14:09 ` [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni
  2 siblings, 0 replies; 4+ messages in thread
From: Paolo Abeni @ 2020-03-12 15:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, Mat Martineau

After the previous patch subflow->conn is always != NULL and
is never changed. We can drop a bunch of now unneeded checks

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/options.c |  2 +-
 net/mptcp/subflow.c | 18 +++++++-----------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b9a8305bd934..eb6cd0accf99 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -362,7 +362,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 
 	opts->ext_copy.use_ack = 0;
 	msk = mptcp_sk(subflow->conn);
-	if (!msk || !READ_ONCE(msk->can_ack)) {
+	if (!READ_ONCE(msk->can_ack)) {
 		*size = ALIGN(dss_size, 4);
 		return ret;
 	}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 047b088e4617..8434c7f5f712 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -112,7 +112,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 
 	subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
 
-	if (subflow->conn && !subflow->conn_finished) {
+	if (!subflow->conn_finished) {
 		pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
 			 subflow->remote_key);
 		mptcp_finish_connect(sk);
@@ -439,9 +439,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
 	if (subflow->data_avail)
 		return true;
 
-	if (!subflow->conn)
-		return false;
-
 	msk = mptcp_sk(subflow->conn);
 	for (;;) {
 		u32 map_remaining;
@@ -561,11 +558,10 @@ static void subflow_data_ready(struct sock *sk)
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct sock *parent = subflow->conn;
 
-	if (!parent || !subflow->mp_capable) {
+	if (!subflow->mp_capable) {
 		subflow->tcp_data_ready(sk);
 
-		if (parent)
-			parent->sk_data_ready(parent);
+		parent->sk_data_ready(parent);
 		return;
 	}
 
@@ -579,7 +575,7 @@ static void subflow_write_space(struct sock *sk)
 	struct sock *parent = subflow->conn;
 
 	sk_stream_write_space(sk);
-	if (parent && sk_stream_is_writeable(sk)) {
+	if (sk_stream_is_writeable(sk)) {
 		set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
 		smp_mb__after_atomic();
 		/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
@@ -694,7 +690,7 @@ static bool subflow_is_done(const struct sock *sk)
 static void subflow_state_change(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-	struct sock *parent = READ_ONCE(subflow->conn);
+	struct sock *parent = subflow->conn;
 
 	__subflow_state_change(sk);
 
@@ -702,10 +698,10 @@ static void subflow_state_change(struct sock *sk)
 	 * a fin packet carrying a DSS can be unnoticed if we don't trigger
 	 * the data available machinery here.
 	 */
-	if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk))
+	if (subflow->mp_capable && mptcp_subflow_data_available(sk))
 		mptcp_data_ready(parent, sk);
 
-	if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
+	if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
 	    !subflow->rx_eof && subflow_is_done(sk)) {
 		subflow->rx_eof = 1;
 		parent->sk_shutdown |= RCV_SHUTDOWN;
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 0/2] mptcp: simplify mptcp_accept()
  2020-03-12 15:13 [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni
  2020-03-12 15:13 ` [PATCH net-next 1/2] mptcp: create msk early Paolo Abeni
  2020-03-12 15:13 ` [PATCH net-next 2/2] mptcp: drop unneeded checks Paolo Abeni
@ 2020-03-13 14:09 ` Paolo Abeni
  2 siblings, 0 replies; 4+ messages in thread
From: Paolo Abeni @ 2020-03-13 14:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, Mat Martineau

On Thu, 2020-03-12 at 16:13 +0100, Paolo Abeni wrote:
> Currently we allocate the MPTCP master socket at accept time.
> 
> The above makes mptcp_accept() quite complex, and requires checks is several
> places for NULL MPTCP master socket.
> 
> These series simplify the MPTCP accept implementation, moving the master socket
> allocation at syn-ack time, so that we drop unneeded checks with the follow-up
> patch.
> 
> Note: patch 2/2 will conflict with net commit 2398e3991bda ("mptcp: always 
> include dack if possible."). If the following will help, I can send a rebased
> version of the series after that net is merged back into net-next.

Since the above commit is now on net-next I'll rebase and submit a v2.

Thanks,

Paolo


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-03-13 14:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-12 15:13 [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni
2020-03-12 15:13 ` [PATCH net-next 1/2] mptcp: create msk early Paolo Abeni
2020-03-12 15:13 ` [PATCH net-next 2/2] mptcp: drop unneeded checks Paolo Abeni
2020-03-13 14:09 ` [PATCH net-next 0/2] mptcp: simplify mptcp_accept() Paolo Abeni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).