All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] [PATCH net-next] Squash-to: "mptcp: refactor shutdown and close"
@ 2020-10-20 16:25 Paolo Abeni
  0 siblings, 0 replies; 5+ messages in thread
From: Paolo Abeni @ 2020-10-20 16:25 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 2498 bytes --]

we must release the ssk socket only after orphaning it,
or we may hit UaF in the receive path.

Beyond the error critical code path reported by syzkaller,
there is also the scenario of a subflow closed by the PM.

Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
Should fix 101 by I actually tested this only vs self-tests
---
 net/mptcp/protocol.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a32c27fe525c..2240fd370014 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1746,16 +1746,22 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		       struct mptcp_subflow_context *subflow)
 {
-	struct socket *sock = READ_ONCE(ssk->sk_socket);
+	bool dispose_socket = false;
+	struct socket *sock;
 
 	list_del(&subflow->node);
 
-	/* outgoing subflow */
-	if (sock && sock != sk->sk_socket)
-		iput(SOCK_INODE(sock));
-
 	lock_sock(ssk);
 
+	/* if we are invoked by the msk cleanup code, the subflow is
+	 * already orphaned
+	 */
+	sock = ssk->sk_socket;
+	if (sock) {
+		dispose_socket = sock != sk->sk_socket;
+		sock_orphan(ssk);
+	}
+
 	/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
 	 * the ssk has been already destroyed, we just need to release the
 	 * reference owned by msk;
@@ -1771,6 +1777,9 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		__sock_put(ssk);
 	}
 	release_sock(ssk);
+	if (dispose_socket)
+		iput(SOCK_INODE(sock));
+
 	sock_put(ssk);
 }
 
@@ -2165,15 +2174,20 @@ static void mptcp_close(struct sock *sk, long timeout)
 	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
 	list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		struct socket *sock = READ_ONCE(ssk->sk_socket);
-		bool slow;
-
-		if (sock && sock != sk->sk_socket)
-			iput(SOCK_INODE(sock));
+		bool slow, dispose_socket;
+		struct socket *sock;
 
 		slow = lock_sock_fast(ssk);
+		sock = ssk->sk_socket;
+		dispose_socket = sock && sock != sk->sk_socket;
 		sock_orphan(ssk);
 		unlock_sock_fast(ssk, slow);
+
+		/* for the outgoing subflows we additionally need to free
+		 * the associated socket
+		 */
+		if (dispose_socket)
+			iput(SOCK_INODE(sock));
 	}
 	sock_orphan(sk);
 
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [MPTCP] [PATCH net-next] Squash-to "mptcp: refactor shutdown and close"
@ 2020-11-04 17:46 Paolo Abeni
  0 siblings, 0 replies; 5+ messages in thread
From: Paolo Abeni @ 2020-11-04 17:46 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 2318 bytes --]

The msk status for fallback socket is not tracked correctly.
We must switch to close only after the subflow is moved
there, or the worker will not run and pending mptcp data
will not be spooled after close()

Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
 net/mptcp/protocol.c | 13 +++++++++----
 net/mptcp/subflow.c  |  1 -
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a6bd06c724d5..1ef980d4a646 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -764,8 +764,10 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
 
 	mptcp_for_each_subflow(msk, subflow)
 		receivers += !subflow->rx_eof;
+	if (receivers)
+		return;
 
-	if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+	if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
 		/* hopefully temporary hack: propagate shutdown status
 		 * to msk, when all subflows agree on it
 		 */
@@ -775,6 +777,8 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
 		set_bit(MPTCP_DATA_READY, &msk->flags);
 		sk->sk_data_ready(sk);
 	}
+	if (sk->sk_state != TCP_CLOSE && sk->sk_shutdown == SHUTDOWN_MASK)
+		inet_sk_state_store(sk, TCP_CLOSE);
 }
 
 static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
@@ -2157,11 +2161,12 @@ static void mptcp_close(struct sock *sk, long timeout)
 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	if (sk->sk_state == TCP_LISTEN ||
-	    __mptcp_check_fallback((struct mptcp_sock *)sk)) {
+	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCP_CLOSE)) {
 		inet_sk_state_store(sk, TCP_CLOSE);
 		goto cleanup;
-	} else if (sk->sk_state == TCP_CLOSE) {
+	} else if (__mptcp_check_fallback((struct mptcp_sock *)sk)) {
+		if (!mptcp_send_pending(sk))
+			inet_sk_state_store(sk, TCP_CLOSE);
 		goto cleanup;
 	}
 
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1e9a72af67dc..fd356dc9d580 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1282,7 +1282,6 @@ static void subflow_state_change(struct sock *sk)
 		mptcp_data_ready(parent, sk);
 
 	if (__mptcp_check_fallback(mptcp_sk(parent)) &&
-	    !(parent->sk_shutdown & RCV_SHUTDOWN) &&
 	    !subflow->rx_eof && subflow_is_done(sk)) {
 		subflow->rx_eof = 1;
 		mptcp_subflow_eof(parent);
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [MPTCP] [PATCH net-next] Squash-to: "mptcp: refactor shutdown and close"
@ 2020-10-15 21:28 Paolo Abeni
  0 siblings, 0 replies; 5+ messages in thread
From: Paolo Abeni @ 2020-10-15 21:28 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 5569 bytes --]

After the refactor __mptcp_close_ssk() can be invoked on
orphaned TCP socket, so we can't wait a timeout, or
sk_stream_wait_close() will oops due to NULL waitqueue.

We can simply always use a 0 timeout (and clean-up the
related signature accordingly): the timeout, if any,
is used before by mptcp_close().

Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
This should fix issue 100, but I only build test it ;)
---
 net/mptcp/pm_netlink.c |  6 ++----
 net/mptcp/protocol.c   | 19 +++++++++----------
 net/mptcp/protocol.h   |  3 +--
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index ed60538df7b2..7599b0f14920 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -417,14 +417,13 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
-		long timeout = 0;
 
 		if (msk->pm.rm_id != subflow->remote_id)
 			continue;
 
 		spin_unlock_bh(&msk->pm.lock);
 		mptcp_subflow_shutdown(sk, ssk, how);
-		__mptcp_close_ssk(sk, ssk, subflow, timeout);
+		__mptcp_close_ssk(sk, ssk, subflow);
 		spin_lock_bh(&msk->pm.lock);
 
 		msk->pm.add_addr_accepted--;
@@ -453,14 +452,13 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
-		long timeout = 0;
 
 		if (rm_id != subflow->local_id)
 			continue;
 
 		spin_unlock_bh(&msk->pm.lock);
 		mptcp_subflow_shutdown(sk, ssk, how);
-		__mptcp_close_ssk(sk, ssk, subflow, timeout);
+		__mptcp_close_ssk(sk, ssk, subflow);
 		spin_lock_bh(&msk->pm.lock);
 
 		msk->pm.local_addr_used--;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0f3dae1dabc9..a6205a940977 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -42,7 +42,7 @@ struct mptcp_skb_cb {
 
 static struct percpu_counter mptcp_sockets_allocated;
 
-static void __mptcp_destroy_sock(struct sock *sk, int timeout);
+static void __mptcp_destroy_sock(struct sock *sk);
 
 /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
  * completed yet or has failed, return the subflow socket.
@@ -1711,8 +1711,7 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
  * parent socket.
  */
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
-		       struct mptcp_subflow_context *subflow,
-		       long timeout)
+		       struct mptcp_subflow_context *subflow)
 {
 	struct socket *sock = READ_ONCE(ssk->sk_socket);
 
@@ -1733,7 +1732,7 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	} else {
 		/* otherwise ask tcp do dispose of ssk and subflow ctx */
 		subflow->disposable = 1;
-		__tcp_close(ssk, timeout);
+		__tcp_close(ssk, 0);
 
 		/* close acquired an extra ref */
 		__sock_put(ssk);
@@ -1784,7 +1783,7 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
 		if (inet_sk_state_load(ssk) != TCP_CLOSE)
 			continue;
 
-		__mptcp_close_ssk((struct sock *)msk, ssk, subflow, 0);
+		__mptcp_close_ssk((struct sock *)msk, ssk, subflow);
 	}
 }
 
@@ -1850,7 +1849,7 @@ static void mptcp_worker(struct work_struct *work)
 	    (mptcp_check_close_timeout(sk) ||
 	    (state != sk->sk_state && sk->sk_state == TCP_CLOSE))) {
 		inet_sk_state_store(sk, TCP_CLOSE);
-		__mptcp_destroy_sock(sk, 0);
+		__mptcp_destroy_sock(sk);
 		goto unlock;
 	}
 
@@ -2077,13 +2076,13 @@ static void __mptcp_wr_shutdown(struct sock *sk)
 	__mptcp_check_send_data_fin(sk);
 }
 
-static void __mptcp_destroy_sock(struct sock *sk, int timeout)
+static void __mptcp_destroy_sock(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	LIST_HEAD(conn_list);
 
-	pr_debug("msk=%p timeout=%d", msk, timeout);
+	pr_debug("msk=%p", msk);
 
 	/* be sure to always acquire the join list lock, to sync vs
 	 * mptcp_finish_join().
@@ -2099,7 +2098,7 @@ static void __mptcp_destroy_sock(struct sock *sk, int timeout)
 
 	list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		__mptcp_close_ssk(sk, ssk, subflow, timeout);
+		__mptcp_close_ssk(sk, ssk, subflow);
 	}
 
 	sk->sk_prot->destroy(sk);
@@ -2151,7 +2150,7 @@ static void mptcp_close(struct sock *sk, long timeout)
 	sock_hold(sk);
 	pr_debug("msk=%p state=%d", sk, sk->sk_state);
 	if (sk->sk_state == TCP_CLOSE) {
-		__mptcp_destroy_sock(sk, timeout);
+		__mptcp_destroy_sock(sk);
 		do_cancel_work = true;
 	} else {
 		sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 408c212e8e1c..4a434689c63c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -412,8 +412,7 @@ void __init mptcp_subflow_init(void);
 void mptcp_subflow_reset(struct sock *ssk);
 void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
-		       struct mptcp_subflow_context *subflow,
-		       long timeout);
+		       struct mptcp_subflow_context *subflow);
 
 /* called with sk socket lock held */
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [MPTCP] [PATCH net-next] Squash-to: "mptcp: refactor shutdown and close"
@ 2020-10-15 14:30 Davide Caratti
  0 siblings, 0 replies; 5+ messages in thread
From: Davide Caratti @ 2020-10-15 14:30 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 933 bytes --]

when the data-fin is acked on all subflows, the socket goes in
FIN_WAIT_2 state. Call __mptcp_destroy_sock() to transmit a TCP FIN.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/98
Signed-off-by: Davide Caratti <dcaratti(a)redhat.com>
---
 net/mptcp/protocol.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5b32741f2af3..95566f120e72 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1859,7 +1859,8 @@ static void mptcp_worker(struct work_struct *work)
 	 */
 	if (sock_flag(sk, SOCK_DEAD) &&
 	    (mptcp_check_close_timeout(sk) ||
-	    (state != sk->sk_state && sk->sk_state == TCP_CLOSE))) {
+	    (state != sk->sk_state &&
+	    ((1 << inet_sk_state_load(sk)) & (TCPF_CLOSE | TCPF_FIN_WAIT2))))) {
 		inet_sk_state_store(sk, TCP_CLOSE);
 		__mptcp_destroy_sock(sk, 0);
 		goto unlock;
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [MPTCP] [PATCH net-next] Squash-to: "mptcp: refactor shutdown and close"
@ 2020-10-05 11:07 Paolo Abeni
  0 siblings, 0 replies; 5+ messages in thread
From: Paolo Abeni @ 2020-10-05 11:07 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 979 bytes --]

With the additional SM handling, fallback socket will
go through an unneeded timeout, because they never
get the DATA_FIN from the peer.

In case of fallback, we can just close the msk and
invoke directly tcp_close on the subflow.

This fixes diag.sh self-tests failure in current export
branch

Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
 net/mptcp/protocol.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 29be694c1d94..7d5905968fac 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2102,7 +2102,8 @@ static void mptcp_close(struct sock *sk, long timeout)
 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (sk->sk_state == TCP_LISTEN ||
+	    __mptcp_check_fallback((struct mptcp_sock *)sk)) {
 		inet_sk_state_store(sk, TCP_CLOSE);
 		goto cleanup;
 	} else if (sk->sk_state == TCP_CLOSE) {
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-11-04 17:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-20 16:25 [MPTCP] [PATCH net-next] Squash-to: "mptcp: refactor shutdown and close" Paolo Abeni
  -- strict thread matches above, loose matches on Subject: below --
2020-11-04 17:46 [MPTCP] [PATCH net-next] Squash-to " Paolo Abeni
2020-10-15 21:28 [MPTCP] [PATCH net-next] Squash-to: " Paolo Abeni
2020-10-15 14:30 Davide Caratti
2020-10-05 11:07 Paolo Abeni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.