All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] [PATCH v1 4/4] mptcp: avoid acquiring the msk lock in mptcp_finisch_connect()
@ 2019-12-09 23:52 Paolo Abeni
  0 siblings, 0 replies; only message in thread
From: Paolo Abeni @ 2019-12-09 23:52 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 12402 bytes --]

Move the mptcp_copy_inaddrs() into mptcp_finish_connect(), so that
the mks lock is no longer required. Just document the unlocked access
with WRITE_ONCE().

Since we now always acquire msk -> ssk lock in order, we can also
avoid the msk lock/subflow lookup()/get()/ msk unlock()/ subflow op/ subflow put
dance in most ops - simply keep the msk lock held the whole time.

Additionally we can simplify mptcp_getname(), as now the address information
are always available in the msk.

RFC -> v1:
 - move mptcp_copy_inaddrs() into finish_connect()

Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
 net/mptcp/protocol.c | 211 +++++++++++++++++--------------------------
 1 file changed, 83 insertions(+), 128 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a11352e5e322..70090366adae 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -36,10 +36,10 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
 }
 
 /* if msk has a single subflow, and the mp_capable handshake is failed,
- * get a reference and return it.
+ * return it.
  * Otherwise returns NULL
  */
-static struct socket *__mptcp_tcp_fallback_get(const struct mptcp_sock *msk)
+static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
 {
 	struct socket *ssock = __mptcp_nmpc_socket(msk);
 
@@ -48,7 +48,6 @@ static struct socket *__mptcp_tcp_fallback_get(const struct mptcp_sock *msk)
 	if (!ssock || tcp_sk(ssock->sk)->is_mptcp)
 		return NULL;
 
-	sock_hold(ssock->sk);
 	return ssock;
 }
 
@@ -70,28 +69,23 @@ static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
 	return ((struct sock *)msk)->sk_state == TCP_CLOSE;
 }
 
-static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk, int state)
+static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
 {
 	struct mptcp_subflow_context *subflow;
 	struct sock *sk = (struct sock *)msk;
 	struct socket *ssock;
 	int err;
 
-	lock_sock(sk);
 	ssock = __mptcp_nmpc_socket(msk);
 	if (ssock)
 		goto set_state;
 
-	if (!__mptcp_can_create_subflow(msk)) {
-		ssock = ERR_PTR(-EINVAL);
-		goto release;
-	}
+	if (!__mptcp_can_create_subflow(msk))
+		return ERR_PTR(-EINVAL);
 
 	err = mptcp_subflow_create_socket(sk, &ssock);
-	if (err) {
-		ssock = ERR_PTR(err);
-		goto release;
-	}
+	if (err)
+		return ERR_PTR(err);
 
 	msk->subflow = ssock;
 	subflow = mptcp_subflow_ctx(ssock->sk);
@@ -100,12 +94,8 @@ static struct socket *mptcp_socket_create_get(struct mptcp_sock *msk, int state)
 	subflow->request_version = 1; /* only v1 supported */
 
 set_state:
-	sock_hold(ssock->sk);
 	if (state != TCP_MAX_STATES)
 		inet_sk_state_store(sk, state);
-
-release:
-	release_sock(sk);
 	return ssock;
 }
 
@@ -273,12 +263,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		return -EOPNOTSUPP;
 
 	lock_sock(sk);
-	ssock = __mptcp_tcp_fallback_get(msk);
+	ssock = __mptcp_tcp_fallback(msk);
 	if (ssock) {
-		release_sock(sk);
 		pr_debug("fallback passthrough");
 		ret = sock_sendmsg(ssock, msg);
-		sock_put(ssock->sk);
+		release_sock(sk);
 		return ret;
 	}
 
@@ -376,13 +365,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	long timeo;
 
 	lock_sock(sk);
-	ssock = __mptcp_tcp_fallback_get(msk);
+	ssock = __mptcp_tcp_fallback(msk);
 	if (ssock) {
-		release_sock(sk);
 		pr_debug("fallback-read subflow=%p",
 			 mptcp_subflow_ctx(ssock->sk));
 		copied = sock_recvmsg(ssock, msg, flags);
-		sock_put(ssock->sk);
+		release_sock(sk);
 		return copied;
 	}
 
@@ -708,25 +696,26 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	char __kernel *optval;
+	int ret = -EOPNOTSUPP;
 	struct socket *ssock;
-	int ret;
 
 	/* will be treated as __user in tcp_setsockopt */
 	optval = (char __kernel __force *)uoptval;
 
 	pr_debug("msk=%p", msk);
-	ssock = mptcp_socket_create_get(msk, TCP_MAX_STATES);
+
+	/* @@ the meaning of setsockopt() when the socket is connected and
+	 * there are multiple subflows is not defined.
+	 */
+	lock_sock(sk);
+	ssock = __mptcp_socket_create(msk, TCP_MAX_STATES);
 	if (!IS_ERR(ssock)) {
 		pr_debug("subflow=%p", ssock->sk);
 		ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
-		sock_put(ssock->sk);
-		return ret;
 	}
+	release_sock(sk);
 
-	/* @@ the meaning of setsockopt() when the socket is connected and
-	 * there are multiple subflows is not defined.
-	 */
-	return -EOPNOTSUPP;
+	return ret;
 }
 
 static int mptcp_getsockopt(struct sock *sk, int level, int optname,
@@ -734,27 +723,28 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	char __kernel *optval;
+	int ret = -EOPNOTSUPP;
 	int __kernel *option;
 	struct socket *ssock;
-	int ret;
 
 	/* will be treated as __user in tcp_getsockopt */
 	optval = (char __kernel __force *)uoptval;
 	option = (int __kernel __force *)uoption;
 
 	pr_debug("msk=%p", msk);
-	ssock = mptcp_socket_create_get(msk, TCP_MAX_STATES);
+	lock_sock(sk);
+
+	/* @@ the meaning of getsockopt() when the socket is connected and
+	 * there are multiple subflows is not defined.
+	 */
+	ssock = __mptcp_socket_create(msk, TCP_MAX_STATES);
 	if (!IS_ERR(ssock)) {
 		pr_debug("subflow=%p", ssock->sk);
 		ret = kernel_getsockopt(ssock, level, optname, optval, option);
-		sock_put(ssock->sk);
-		return ret;
 	}
+	release_sock(sk);
 
-	/* @@ the meaning of getsockopt() when the socket is connected and
-	 * there are multiple subflows is not defined.
-	 */
-	return -EOPNOTSUPP;
+	return ret;
 }
 
 static int mptcp_get_port(struct sock *sk, unsigned short snum)
@@ -773,57 +763,34 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
 void mptcp_finish_connect(struct sock *ssk)
 {
 	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk;
 	struct sock *sk;
+	u64 ack_seq;
 
 	subflow = mptcp_subflow_ctx(ssk);
-	sk = subflow->conn;
-
-	if (subflow->mp_capable) {
-		struct mptcp_sock *msk = mptcp_sk(sk);
-		u64 ack_seq;
+	if (!subflow->mp_capable)
+		return;
 
-		/* sk (new subflow socket) is already locked, but we need
-		 * to lock the parent (mptcp) socket now to add the tcp socket
-		 * to the subflow list.
-		 *
-		 * From lockdep point of view, this creates an ABBA type
-		 * deadlock: Normally (sendmsg, recvmsg, ..), we lock the mptcp
-		 * socket, then acquire a subflow lock.
-		 * Here we do the reverse: "subflow lock, then mptcp lock".
-		 *
-		 * Its alright to do this here, because this subflow is not yet
-		 * on the mptcp sockets subflow list.
-		 *
-		 * IOW, if another CPU has this mptcp socket locked, it cannot
-		 * acquire this particular subflow, because subflow->sk isn't
-		 * on msk->conn_list.
-		 *
-		 * This function can be called either from backlog processing
-		 * (BH will be enabled) or from softirq, so we need to use BH
-		 * locking scheme.
-		 */
-		local_bh_disable();
-		bh_lock_sock_nested(sk);
+	sk = subflow->conn;
+	msk = mptcp_sk(sk);
 
-		msk->remote_key = subflow->remote_key;
-		msk->local_key = subflow->local_key;
-		msk->token = subflow->token;
-		mptcp_copy_inaddrs(sk, ssk);
+	pr_debug("msk=%p, token=%u", sk, subflow->token);
 
-		pr_debug("msk=%p, token=%u", msk, msk->token);
+	mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+	ack_seq++;
+	subflow->map_seq = ack_seq;
+	subflow->map_subflow_seq = 1;
+	subflow->rel_write_seq = 1;
 
-		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
-		msk->write_seq = subflow->idsn + 1;
-		ack_seq++;
-		msk->ack_seq = ack_seq;
-		msk->can_ack = 1;
-		subflow->map_seq = ack_seq;
-		subflow->map_subflow_seq = 1;
-		subflow->rel_write_seq = 1;
-		bh_unlock_sock(sk);
-		local_bh_enable();
-	}
-	inet_sk_state_store(sk, TCP_ESTABLISHED);
+	/* the socket is not connected yet, no msk/subflow ops can access/race
+	 * accessing the field below
+	 */
+	WRITE_ONCE(msk->remote_key, subflow->remote_key);
+	WRITE_ONCE(msk->local_key, subflow->local_key);
+	WRITE_ONCE(msk->token, subflow->token);
+	WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+	WRITE_ONCE(msk->ack_seq, ack_seq);
+	WRITE_ONCE(msk->can_ack, 1);
 }
 
 static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -868,12 +835,19 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct socket *ssock;
 	int err;
 
-	ssock = mptcp_socket_create_get(msk, TCP_MAX_STATES);
-	if (IS_ERR(ssock))
-		return PTR_ERR(ssock);
+	lock_sock(sock->sk);
+	ssock = __mptcp_socket_create(msk, TCP_MAX_STATES);
+	if (IS_ERR(ssock)) {
+		err = PTR_ERR(ssock);
+		goto unlock;
+	}
 
 	err = ssock->ops->bind(ssock, uaddr, addr_len);
-	sock_put(ssock->sk);
+	if (!err)
+		mptcp_copy_inaddrs(sock->sk, ssock->sk);
+
+unlock:
+	release_sock(sock->sk);
 	return err;
 }
 
@@ -884,9 +858,12 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct socket *ssock;
 	int err;
 
-	ssock = mptcp_socket_create_get(msk, TCP_SYN_SENT);
-	if (IS_ERR(ssock))
-		return PTR_ERR(ssock);
+	lock_sock(sock->sk);
+	ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
+	if (IS_ERR(ssock)) {
+		err = PTR_ERR(ssock);
+		goto unlock;
+	}
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -897,33 +874,19 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 #endif
 
 	err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
-
-	/* should we keep msk socket lock instead? and release it this
-	 * latter state update */
 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
-	sock_put(ssock->sk);
+	mptcp_copy_inaddrs(sock->sk, ssock->sk);
+
+unlock:
+	release_sock(sock->sk);
 	return err;
 }
 
 static int mptcp_getname(struct socket *sock, struct sockaddr *uaddr,
 			 int peer, int af)
 {
-	struct mptcp_sock *msk = mptcp_sk(sock->sk);
-	struct socket *ssock;
 	int ret;
 
-	lock_sock(sock->sk);
-	ssock = __mptcp_nmpc_socket(msk);
-	if (ssock)
-		sock_hold(ssock->sk);
-	release_sock(sock->sk);
-	if (ssock) {
-		pr_debug("subflow=%p", ssock->sk);
-		ret = ssock->ops->getname(ssock, uaddr, peer);
-		sock_put(ssock->sk);
-		return ret;
-	}
-
 	switch (af) {
 	case AF_INET:
 		ret = inet_getname(sock, uaddr, peer);
@@ -987,16 +950,20 @@ static int mptcp_listen(struct socket *sock, int backlog)
 
 	pr_debug("msk=%p", msk);
 
-	ssock = mptcp_socket_create_get(msk, TCP_LISTEN);
-	if (IS_ERR(ssock))
-		return PTR_ERR(ssock);
+	lock_sock(sock->sk);
+	ssock = __mptcp_socket_create(msk, TCP_LISTEN);
+	if (IS_ERR(ssock)) {
+		err = PTR_ERR(ssock);
+		goto unlock;
+	}
 
 	err = ssock->ops->listen(ssock, backlog);
-
-	/* should we keep msk socket lock instead? and release it this
-	 * latter state update */
 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
-	sock_put(ssock->sk);
+	if (!err)
+		mptcp_copy_inaddrs(sock->sk, ssock->sk);
+
+unlock:
+	release_sock(sock->sk);
 	return err;
 }
 
@@ -1065,10 +1032,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 	lock_sock(sk);
 	ssock = __mptcp_nmpc_socket(msk);
 	if (ssock) {
-		sock_hold(ssock->sk);
-		release_sock(sk);
 		mask = ssock->ops->poll(file, ssock, wait);
-		sock_put(ssock->sk);
+		release_sock(sk);
 		return mask;
 	}
 
@@ -1092,7 +1057,6 @@ static int mptcp_shutdown(struct socket *sock, int how)
 {
 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
 	struct mptcp_subflow_context *subflow;
-	struct socket *ssock;
 	int ret = 0;
 
 	pr_debug("sk=%p, how=%d", msk, how);
@@ -1102,15 +1066,6 @@ static int mptcp_shutdown(struct socket *sock, int how)
 	if (how == SHUT_WR || how == SHUT_RDWR)
 		inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
 
-	ssock = __mptcp_tcp_fallback_get(msk);
-	if (ssock) {
-		release_sock(sock->sk);
-		pr_debug("subflow=%p", ssock->sk);
-		ret = kernel_sock_shutdown(ssock, how);
-		sock_put(ssock->sk);
-		return ret;
-	}
-
 	how++;
 
 	if ((how & ~SHUTDOWN_MASK) || !how) {
-- 
2.21.0

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2019-12-09 23:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-09 23:52 [MPTCP] [PATCH v1 4/4] mptcp: avoid acquiring the msk lock in mptcp_finisch_connect() Paolo Abeni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.