* [RFC PATCH 2/3] mptcp: full disconnect implementation
2021-11-05 10:49 [RFC PATCH 0/3] mptcp: improve accept() and disconnect() Paolo Abeni
2021-11-05 10:49 ` [RFC PATCH 1/3] mptcp: never allow the PM to close a listener subflow Paolo Abeni
@ 2021-11-05 10:49 ` Paolo Abeni
2021-11-05 10:49 ` [RFC PATCH 3/3] mptcp: cleanup accept and poll Paolo Abeni
2021-11-06 0:27 ` [RFC PATCH 0/3] mptcp: improve accept() and disconnect() Mat Martineau
3 siblings, 0 replies; 9+ messages in thread
From: Paolo Abeni @ 2021-11-05 10:49 UTC (permalink / raw)
To: MPTCP Upstream
The current mptcp_disconnect() implementation lacks several
steps, we additionally need to reset the msk socket state
and flush the subflow list.
Factor out the needed helper to avoid code duplication.
Additionally ensure that the initial subflow is disposed
only after mptcp_close(), just reset it at disconnect time.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/mptcp/pm.c | 10 +++--
net/mptcp/protocol.c | 95 ++++++++++++++++++++++++++++++++------------
net/mptcp/protocol.h | 14 +++++++
net/mptcp/token.c | 1 +
4 files changed, 92 insertions(+), 28 deletions(-)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6ab386ff3294..6b2bfe89d445 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -356,7 +356,7 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
-void mptcp_pm_data_init(struct mptcp_sock *msk)
+void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
msk->pm.add_addr_accepted = 0;
@@ -371,10 +371,14 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
+ mptcp_pm_nl_data_init(msk);
+}
+
+void mptcp_pm_data_init(struct mptcp_sock *msk)
+{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
-
- mptcp_pm_nl_data_init(msk);
+ mptcp_pm_data_reset(msk);
}
void __init mptcp_pm_init(void)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1ee3530ac2a3..5fe19adb88a5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2226,22 +2226,33 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
* parent socket.
*/
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow)
+ struct mptcp_subflow_context *subflow,
+ bool check_pending)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push;
+ bool need_push, dispose_it;
- list_del(&subflow->node);
+ dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ if (dispose_it)
+ list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ need_push = check_pending && __mptcp_retransmit_pending_data(sk);
+ if (!dispose_it) {
+ tcp_disconnect(ssk, 0);
+ mptcp_subflow_ctx_reset(subflow);
+ release_sock(ssk);
+
+ goto out;
+ }
+
/* if we are invoked by the msk cleanup code, the subflow is
* already orphaned
*/
if (ssk->sk_socket)
sock_orphan(ssk);
- need_push = __mptcp_retransmit_pending_data(sk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2261,14 +2272,12 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_put(ssk);
- if (ssk == msk->last_snd)
- msk->last_snd = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
- if (msk->subflow && ssk == msk->subflow->sk)
- mptcp_dispose_initial_subflow(msk);
+out:
+ if (ssk == msk->last_snd)
+ msk->last_snd = NULL;
if (need_push)
__mptcp_push_pending(sk, 0);
@@ -2279,7 +2288,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
{
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
- __mptcp_close_ssk(sk, ssk, subflow);
+ __mptcp_close_ssk(sk, ssk, subflow, true);
}
static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
@@ -2501,9 +2510,20 @@ static int __mptcp_init_sock(struct sock *sk)
return 0;
}
-static int mptcp_init_sock(struct sock *sk)
+static void mptcp_ca_reset(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_assign_congestion_control(sk);
+ strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+
+ /* no need to keep a reference to the ops, the name will suffice */
+ tcp_cleanup_congestion_control(sk);
+ icsk->icsk_ca_ops = NULL;
+}
+
+static int mptcp_init_sock(struct sock *sk)
+{
struct net *net = sock_net(sk);
int ret;
@@ -2524,12 +2544,7 @@ static int mptcp_init_sock(struct sock *sk)
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
* propagate the correct value
*/
- tcp_assign_congestion_control(sk);
- strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
-
- /* no need to keep a reference to the ops, the name will suffice */
- tcp_cleanup_congestion_control(sk);
- icsk->icsk_ca_ops = NULL;
+ mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2687,9 +2702,13 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ /* clears msk->subflow, allowing the following loop to close
+ * even the initial subflow
+ */
+ mptcp_dispose_initial_subflow(msk);
list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- __mptcp_close_ssk(sk, ssk, subflow);
+ __mptcp_close_ssk(sk, ssk, subflow, false);
}
sk->sk_prot->destroy(sk);
@@ -2700,7 +2719,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
- mptcp_dispose_initial_subflow(msk);
sock_put(sk);
}
@@ -2736,6 +2754,9 @@ static void mptcp_close(struct sock *sk, long timeout)
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
do_cancel_work = true;
@@ -2746,9 +2767,6 @@ static void mptcp_close(struct sock *sk, long timeout)
if (do_cancel_work)
mptcp_cancel_work(sk);
- if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
-
sock_put(sk);
}
@@ -2782,13 +2800,40 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_do_flush_join_list(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
+
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- lock_sock(ssk);
- tcp_disconnect(ssk, flags);
- release_sock(ssk);
+ __mptcp_close_ssk(sk, ssk, subflow, false);
}
+
+ sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
+
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
+ mptcp_destroy_common(msk);
+ msk->last_snd = 0;
+ msk->flags = 0;
+ msk->recovery = false;
+ msk->can_ack = false;
+ msk->fully_established = false;
+ msk->rcv_data_fin = false;
+ msk->snd_data_fin_enable = false;
+ msk->rcv_fastclose = false;
+ msk->use_64bit_ack = false;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ mptcp_pm_data_reset(msk);
+ mptcp_ca_reset(sk);
+
+#if IS_ENABLED(CONFIG_KASAN)
+ sock_set_flag(sk, SOCK_RCU_FREE);
+#else
+ sock_reset_flag(sk, SOCK_RCU_FREE);
+#endif
+
return 0;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f6f4054e19ee..188b9e044672 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -392,6 +392,9 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
+
+ char reset_start[0];
+
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
u64 remote_key;
@@ -438,6 +441,9 @@ struct mptcp_subflow_context {
u8 stale_count;
long delegated_status;
+
+ char reset_end[0];
+
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
u32 setsockopt_seq;
@@ -469,6 +475,13 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
return subflow->tcp_sock;
}
+static inline void
+mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
+{
+ memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ subflow->request_mptcp = 1;
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
@@ -708,6 +721,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
+void mptcp_pm_data_reset(struct mptcp_sock *msk);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index e581b341c5be..f52ee7b26aed 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -384,6 +384,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk)
bucket->chain_len--;
}
spin_unlock_bh(&bucket->lock);
+ WRITE_ONCE(msk->token, 0);
}
void __init mptcp_token_init(void)
--
2.26.3
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [RFC PATCH 3/3] mptcp: cleanup accept and poll
2021-11-05 10:49 [RFC PATCH 0/3] mptcp: improve accept() and disconnect() Paolo Abeni
2021-11-05 10:49 ` [RFC PATCH 1/3] mptcp: never allow the PM to close a listener subflow Paolo Abeni
2021-11-05 10:49 ` [RFC PATCH 2/3] mptcp: full disconnect implementation Paolo Abeni
@ 2021-11-05 10:49 ` Paolo Abeni
2021-11-05 10:59 ` mptcp: cleanup accept and poll: Build Failure MPTCP CI
2021-11-06 0:27 ` [RFC PATCH 0/3] mptcp: improve accept() and disconnect() Mat Martineau
3 siblings, 1 reply; 9+ messages in thread
From: Paolo Abeni @ 2021-11-05 10:49 UTC (permalink / raw)
To: MPTCP Upstream
After the previous patch, msk->subflow will never be deleted during
the whole msk lifetime. We don't need anymore to acquire references to
it in mptcp_stream_accept() and we can use the listener subflow accept
queue to simplify mptcp_poll() for listener socket.
Overall this removes a lock pair and 4 more atomic operations per accept().
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/mptcp/protocol.c | 25 +++++++------------------
net/mptcp/protocol.h | 1 -
net/mptcp/subflow.c | 1 -
3 files changed, 7 insertions(+), 20 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5fe19adb88a5..38f5b4bcba69 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3402,17 +3402,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- lock_sock(sock->sk);
- if (sock->sk->sk_state != TCP_LISTEN)
- goto unlock_fail;
-
ssock = __mptcp_nmpc_socket(msk);
if (!ssock)
- goto unlock_fail;
-
- clear_bit(MPTCP_DATA_READY, &msk->flags);
- sock_hold(ssock->sk);
- release_sock(sock->sk);
+ return -EINVAL;
err = ssock->ops->accept(sock, newsock, flags, kern);
if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
@@ -3452,14 +3444,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
release_sock(newsk);
}
- if (inet_csk_listen_poll(ssock->sk))
- set_bit(MPTCP_DATA_READY, &msk->flags);
- sock_put(ssock->sk);
return err;
-
-unlock_fail:
- release_sock(sock->sk);
- return -EINVAL;
}
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
@@ -3505,8 +3490,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
- if (state == TCP_LISTEN)
- return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0;
+ if (state == TCP_LISTEN) {
+ if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
+ return 0;
+
+ return inet_csk_listen_poll(msk->subflow->sk);
+ }
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 188b9e044672..b2d016f8803a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -111,7 +111,6 @@
#define MPTCP_RST_TRANSIENT BIT(0)
/* MPTCP socket flags */
-#define MPTCP_DATA_READY 0
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 72ccbd85941b..4e7395cfab9b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1293,7 +1293,6 @@ static void subflow_data_ready(struct sock *sk)
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
return;
- set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
--
2.26.3
^ permalink raw reply related [flat|nested] 9+ messages in thread