All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next 0/2] mptcp: a couple of cleanups
@ 2021-12-06 17:34 Paolo Abeni
  2021-12-06 17:34 ` [PATCH mptcp-next 1/2] mptcp: cleanup MPJ subflow list handling Paolo Abeni
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Paolo Abeni @ 2021-12-06 17:34 UTC (permalink / raw)
  To: mptcp

The first patch was already shared as RFC, this iteration
addresses a couple of bugs there.

The second patch removes a bunch of conditionals and atomic
operations in the fast path, but the performance impact is
actually below noise level. Still I think it's worthy, as the
unneeded atomic operations are confusing.

Paolo Abeni (2):
  mptcp: cleanup MPJ subflow list handling
  mptcp: avoid atomic bit manipulation when possible

 net/mptcp/pm_netlink.c |   3 -
 net/mptcp/protocol.c   | 149 ++++++++++++++++++-----------------------
 net/mptcp/protocol.h   |  31 ++++-----
 net/mptcp/sockopt.c    |  24 ++-----
 net/mptcp/subflow.c    |   9 ++-
 5 files changed, 89 insertions(+), 127 deletions(-)

-- 
2.33.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 1/2] mptcp: cleanup MPJ subflow list handling
  2021-12-06 17:34 [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Paolo Abeni
@ 2021-12-06 17:34 ` Paolo Abeni
  2021-12-06 17:34 ` [PATCH mptcp-next 2/2] mptcp: avoid atomic bit manipulation when possible Paolo Abeni
  2021-12-09  1:03 ` [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Mat Martineau
  2 siblings, 0 replies; 7+ messages in thread
From: Paolo Abeni @ 2021-12-06 17:34 UTC (permalink / raw)
  To: mptcp

We can simplify the join list handling leveraging the
mptcp_release_cb(): if we can acquire the msk socket
lock ad mptcp_finish_join time, move the new subflow
directly into the conn_list, othewise place it on join_list and
let the release_cb process such list.

Since pending MPJ connection are now always processed
in a timely way, we can avoid flushing the join list
every time we have to process all the current subflows.

Additionally we can now use the mptcp data lock to protect
the join_list, removing the additional spin lock.

Finally, the MPJ handshake is now always finalized under the
msk socket lock, we can drop the additional synchronization
between mptcp_finish_join() and mptcp_close().

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
rfc -> v1:
 - fix list corruption in __mptcp_flush_join_list()
 - fix label name typo (Mat)
---
 net/mptcp/pm_netlink.c |   3 --
 net/mptcp/protocol.c   | 109 +++++++++++++++++------------------------
 net/mptcp/protocol.h   |  15 +-----
 net/mptcp/sockopt.c    |  24 +++------
 net/mptcp/subflow.c    |   5 +-
 5 files changed, 56 insertions(+), 100 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 48963ae4c610..3162e8d25d05 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -165,7 +165,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
 	msk_owned_by_me(msk);
 
 	rcu_read_lock();
-	__mptcp_flush_join_list(msk);
 	list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
 		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
 			continue;
@@ -544,7 +543,6 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
 	subflows_max = mptcp_pm_get_subflows_max(msk);
 
 	rcu_read_lock();
-	__mptcp_flush_join_list(msk);
 	list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
 		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
 			continue;
@@ -633,7 +631,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
 	    !mptcp_pm_should_rm_signal(msk))
 		return;
 
-	__mptcp_flush_join_list(msk);
 	subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
 	if (subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index accd109fd86d..38651c80b321 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -808,47 +808,31 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 	mptcp_data_unlock(sk);
 }
 
-static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
+static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
 {
-	struct mptcp_subflow_context *subflow;
-	bool ret = false;
-
-	if (likely(list_empty(&msk->join_list)))
+	if (((struct sock *)msk)->sk_state != TCP_ESTABLISHED)
 		return false;
 
-	spin_lock_bh(&msk->join_list_lock);
-	list_for_each_entry(subflow, &msk->join_list, node) {
-		u32 sseq = READ_ONCE(subflow->setsockopt_seq);
-
-		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
-		if (READ_ONCE(msk->setsockopt_seq) != sseq)
-			ret = true;
-	}
-	list_splice_tail_init(&msk->join_list, &msk->conn_list);
-	spin_unlock_bh(&msk->join_list_lock);
-
-	return ret;
-}
-
-void __mptcp_flush_join_list(struct mptcp_sock *msk)
-{
-	if (likely(!mptcp_do_flush_join_list(msk)))
-		return;
-
-	if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
-		mptcp_schedule_work((struct sock *)msk);
+	mptcp_propagate_sndbuf((struct sock *)msk, ssk);
+	mptcp_sockopt_sync_locked(msk, ssk);
+	WRITE_ONCE(msk->allow_infinite_fallback, false);
+	return true;
 }
 
-static void mptcp_flush_join_list(struct mptcp_sock *msk)
+static void __mptcp_flush_join_list(struct sock *sk)
 {
-	bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
-
-	might_sleep();
+	struct mptcp_subflow_context *tmp, *subflow;
+	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	if (!mptcp_do_flush_join_list(msk) && !sync_needed)
-		return;
+	list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow = lock_sock_fast(ssk);
 
-	mptcp_sockopt_sync_all(msk);
+		list_move_tail(&subflow->node, &msk->conn_list);
+		if (!__mptcp_finish_join(msk, ssk))
+			mptcp_subflow_reset(ssk);
+		unlock_sock_fast(ssk, slow);
+	}
 }
 
 static bool mptcp_timer_pending(struct sock *sk)
@@ -1568,7 +1552,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			int ret = 0;
 
 			prev_ssk = ssk;
-			__mptcp_flush_join_list(msk);
 			ssk = mptcp_subflow_get_send(msk);
 
 			/* First check. If the ssk has changed since
@@ -1973,7 +1956,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 	unsigned int moved = 0;
 	bool ret, done;
 
-	mptcp_flush_join_list(msk);
 	do {
 		struct sock *ssk = mptcp_subflow_recv_lookup(msk);
 		bool slowpath;
@@ -2510,7 +2492,6 @@ static void mptcp_worker(struct work_struct *work)
 		goto unlock;
 
 	mptcp_check_data_fin_ack(sk);
-	mptcp_flush_join_list(msk);
 
 	mptcp_check_fastclose(msk);
 
@@ -2549,8 +2530,6 @@ static int __mptcp_init_sock(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	spin_lock_init(&msk->join_list_lock);
-
 	INIT_LIST_HEAD(&msk->conn_list);
 	INIT_LIST_HEAD(&msk->join_list);
 	INIT_LIST_HEAD(&msk->rtx_queue);
@@ -2729,7 +2708,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
 		}
 	}
 
-	mptcp_flush_join_list(msk);
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
 
@@ -2762,12 +2740,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
 	might_sleep();
 
-	/* be sure to always acquire the join list lock, to sync vs
-	 * mptcp_finish_join().
-	 */
-	spin_lock_bh(&msk->join_list_lock);
-	list_splice_tail_init(&msk->join_list, &msk->conn_list);
-	spin_unlock_bh(&msk->join_list_lock);
+	/* join list will be eventually flushed (with rst) at sock lock release time*/
 	list_splice_init(&msk->conn_list, &conn_list);
 
 	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
@@ -2870,8 +2843,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	struct mptcp_subflow_context *subflow;
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	mptcp_do_flush_join_list(msk);
-
 	inet_sk_state_store(sk, TCP_CLOSE);
 
 	mptcp_for_each_subflow(msk, subflow) {
@@ -3102,6 +3073,8 @@ static void mptcp_release_cb(struct sock *sk)
 			flags |= BIT(MPTCP_PUSH_PENDING);
 		if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
 			flags |= BIT(MPTCP_RETRANSMIT);
+		if (test_and_clear_bit(MPTCP_FLUSH_JOIN_LIST, &mptcp_sk(sk)->flags))
+			flags |= BIT(MPTCP_FLUSH_JOIN_LIST);
 		if (!flags)
 			break;
 
@@ -3114,6 +3087,8 @@ static void mptcp_release_cb(struct sock *sk)
 		 */
 
 		spin_unlock_bh(&sk->sk_lock.slock);
+		if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
+			__mptcp_flush_join_list(sk);
 		if (flags & BIT(MPTCP_PUSH_PENDING))
 			__mptcp_push_pending(sk, 0);
 		if (flags & BIT(MPTCP_RETRANSMIT))
@@ -3259,7 +3234,7 @@ bool mptcp_finish_join(struct sock *ssk)
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 	struct sock *parent = (void *)msk;
 	struct socket *parent_sock;
-	bool ret;
+	bool ret = true;
 
 	pr_debug("msk=%p, subflow=%p", msk, subflow);
 
@@ -3272,24 +3247,32 @@ bool mptcp_finish_join(struct sock *ssk)
 	if (!msk->pm.server_side)
 		goto out;
 
-	if (!mptcp_pm_allow_new_subflow(msk)) {
-		subflow->reset_reason = MPTCP_RST_EPROHIBIT;
-		return false;
-	}
+	if (!mptcp_pm_allow_new_subflow(msk))
+		goto err_prohibited;
 
-	/* active connections are already on conn_list, and we can't acquire
-	 * msk lock here.
-	 * use the join list lock as synchronization point and double-check
-	 * msk status to avoid racing with __mptcp_destroy_sock()
+	if (WARN_ON_ONCE(!list_empty(&subflow->node)))
+		goto err_prohibited;
+
+	/* active connections are already on conn_list.
+	 * If we can't acquire msk socket lock here, let the release callback
+	 * handle it
 	 */
-	spin_lock_bh(&msk->join_list_lock);
-	ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
-	if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) {
-		list_add_tail(&subflow->node, &msk->join_list);
+	mptcp_data_lock(parent);
+	if (!sock_owned_by_user(parent)) {
+		ret = __mptcp_finish_join(msk, ssk);
+		if (ret) {
+			sock_hold(ssk);
+			list_add_tail(&subflow->node, &msk->conn_list);
+		}
+	} else {
 		sock_hold(ssk);
+		list_add_tail(&subflow->node, &msk->join_list);
+		set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->flags);
 	}
-	spin_unlock_bh(&msk->join_list_lock);
+	mptcp_data_unlock(parent);
+
 	if (!ret) {
+err_prohibited:
 		subflow->reset_reason = MPTCP_RST_EPROHIBIT;
 		return false;
 	}
@@ -3300,8 +3283,9 @@ bool mptcp_finish_join(struct sock *ssk)
 	parent_sock = READ_ONCE(parent->sk_socket);
 	if (parent_sock && !ssk->sk_socket)
 		mptcp_sock_graft(ssk, parent_sock);
+
 	subflow->map_seq = READ_ONCE(msk->ack_seq);
-	WRITE_ONCE(msk->allow_infinite_fallback, false);
+
 out:
 	mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
 	return true;
@@ -3566,7 +3550,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
 		 * This is needed so NOSPACE flag can be set from tcp stack.
 		 */
-		mptcp_flush_join_list(msk);
 		mptcp_for_each_subflow(msk, subflow) {
 			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3e7e541a013f..4a140aec68b3 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -120,7 +120,7 @@
 #define MPTCP_CLEAN_UNA		7
 #define MPTCP_ERROR_REPORT	8
 #define MPTCP_RETRANSMIT	9
-#define MPTCP_WORK_SYNC_SETSOCKOPT 10
+#define MPTCP_FLUSH_JOIN_LIST	10
 #define MPTCP_CONNECTED		11
 
 static inline bool before64(__u64 seq1, __u64 seq2)
@@ -255,7 +255,6 @@ struct mptcp_sock {
 	u8		recvmsg_inq:1,
 			cork:1,
 			nodelay:1;
-	spinlock_t	join_list_lock;
 	struct work_struct work;
 	struct sk_buff  *ooo_last_skb;
 	struct rb_root  out_of_order_queue;
@@ -505,15 +504,6 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
 	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
 }
 
-static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
-					     struct mptcp_subflow_context *subflow)
-{
-	sock_hold(mptcp_subflow_tcp_sock(subflow));
-	spin_lock_bh(&msk->join_list_lock);
-	list_add_tail(&subflow->node, &msk->join_list);
-	spin_unlock_bh(&msk->join_list_lock);
-}
-
 void mptcp_subflow_process_delegated(struct sock *ssk);
 
 static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
@@ -678,7 +668,6 @@ void __mptcp_data_acked(struct sock *sk);
 void __mptcp_error_report(struct sock *sk);
 void mptcp_subflow_eof(struct sock *sk);
 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
-void __mptcp_flush_join_list(struct mptcp_sock *msk);
 static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
 {
 	return READ_ONCE(msk->snd_data_fin_enable) &&
@@ -838,7 +827,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
 
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
-void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
 
 static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
 {
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 3c3db22fd36a..962cfe3c463e 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1286,27 +1286,15 @@ void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
 	}
 }
 
-void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
 {
-	struct mptcp_subflow_context *subflow;
-	struct sock *sk = (struct sock *)msk;
-	u32 seq;
-
-	seq = sockopt_seq_reset(sk);
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 
-	mptcp_for_each_subflow(msk, subflow) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		u32 sseq = READ_ONCE(subflow->setsockopt_seq);
+	msk_owned_by_me(msk);
 
-		if (sseq != msk->setsockopt_seq) {
-			__mptcp_sockopt_sync(msk, ssk);
-			WRITE_ONCE(subflow->setsockopt_seq, seq);
-		} else if (sseq != seq) {
-			WRITE_ONCE(subflow->setsockopt_seq, seq);
-		}
+	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
+		sync_socket_options(msk, ssk);
 
-		cond_resched();
+		subflow->setsockopt_seq = msk->setsockopt_seq;
 	}
-
-	msk->setsockopt_seq = seq;
 }
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 76556743e952..536a322a6fd0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1448,7 +1448,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	subflow->start_stamp = tcp_jiffies32;
 	mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
 
-	mptcp_add_pending_subflow(msk, subflow);
+	sock_hold(ssk);
+	list_add_tail(&subflow->node, &msk->conn_list);
 	err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
 	if (err && err != -EINPROGRESS)
 		goto failed_unlink;
@@ -1460,9 +1461,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	return err;
 
 failed_unlink:
-	spin_lock_bh(&msk->join_list_lock);
 	list_del(&subflow->node);
-	spin_unlock_bh(&msk->join_list_lock);
 	mptcp_pm_subflow_check_next(msk, ssk, subflow);
 	sock_put(mptcp_subflow_tcp_sock(subflow));
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 2/2] mptcp: avoid atomic bit manipulation when possible
  2021-12-06 17:34 [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Paolo Abeni
  2021-12-06 17:34 ` [PATCH mptcp-next 1/2] mptcp: cleanup MPJ subflow list handling Paolo Abeni
@ 2021-12-06 17:34 ` Paolo Abeni
  2021-12-09  1:03 ` [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Mat Martineau
  2 siblings, 0 replies; 7+ messages in thread
From: Paolo Abeni @ 2021-12-06 17:34 UTC (permalink / raw)
  To: mptcp

Currently the msk->flags bitmask carries both state for the
mptcp_release_cb() - mostly touched under the mptcp data lock
- and others state info touched even outside such lock scope.

As a consequence, msk->flags is always manipulated with
atomic operations.

This change splits such bitmask in two separate fields, so
that we use plain bit oper operations when touching the
cb-related info.

The MPTCP_PUSH_PENDING bit needs additional care, as it is the
only CB related field currently accessed either under the mptcp
data lock or the mptcp socket lock.
Let's add another mask just for such bit's sake.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 46 +++++++++++++++++++++++---------------------
 net/mptcp/protocol.h | 18 ++++++++++-------
 net/mptcp/subflow.c  |  4 ++--
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 38651c80b321..42c382d0eb01 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -763,7 +763,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 		if (!sock_owned_by_user(sk))
 			__mptcp_error_report(sk);
 		else
-			set_bit(MPTCP_ERROR_REPORT,  &msk->flags);
+			__set_bit(MPTCP_ERROR_REPORT,  &msk->cb_flags);
 	}
 
 	/* If the moves have caught up with the DATA_FIN sequence number
@@ -1529,9 +1529,8 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
 
 void mptcp_check_and_set_pending(struct sock *sk)
 {
-	if (mptcp_send_head(sk) &&
-	    !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
-		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+	if (mptcp_send_head(sk))
+		mptcp_sk(sk)->push_pending |= MPTCP_PUSH_PENDING;
 }
 
 void __mptcp_push_pending(struct sock *sk, unsigned int flags)
@@ -2146,7 +2145,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
 			mptcp_schedule_work(sk);
 	} else {
 		/* delegate our work to tcp_release_cb() */
-		set_bit(MPTCP_RETRANSMIT, &msk->flags);
+		__set_bit(MPTCP_RETRANSMIT, &msk->cb_flags);
 	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -2859,7 +2858,9 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 
 	mptcp_destroy_common(msk);
 	msk->last_snd = NULL;
-	msk->flags = 0;
+	WRITE_ONCE(msk->flags, 0);
+	msk->cb_flags = 0;
+	msk->push_pending = 0;
 	msk->recovery = false;
 	msk->can_ack = false;
 	msk->fully_established = false;
@@ -3040,7 +3041,7 @@ void __mptcp_data_acked(struct sock *sk)
 	if (!sock_owned_by_user(sk))
 		__mptcp_clean_una(sk);
 	else
-		set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags);
+		__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
 
 	if (mptcp_pending_data_fin_ack(sk))
 		mptcp_schedule_work(sk);
@@ -3059,22 +3060,22 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 		else if (xmit_ssk)
 			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
 	} else {
-		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+		__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
 	}
 }
 
+#define MPTCP_FLAGS_PROCESS_CTX_NEED (MPTCP_PUSH_PENDING | \
+				      MPTCP_RETRANSMIT | \
+				      MPTCP_FLUSH_JOIN_LIST)
+
 /* processes deferred events and flush wmem */
 static void mptcp_release_cb(struct sock *sk)
 {
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
 	for (;;) {
-		unsigned long flags = 0;
-
-		if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
-			flags |= BIT(MPTCP_PUSH_PENDING);
-		if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
-			flags |= BIT(MPTCP_RETRANSMIT);
-		if (test_and_clear_bit(MPTCP_FLUSH_JOIN_LIST, &mptcp_sk(sk)->flags))
-			flags |= BIT(MPTCP_FLUSH_JOIN_LIST);
+		unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
+				      msk->push_pending;
 		if (!flags)
 			break;
 
@@ -3085,7 +3086,8 @@ static void mptcp_release_cb(struct sock *sk)
 		 *    datapath acquires the msk socket spinlock while helding
 		 *    the subflow socket lock
 		 */
-
+		msk->push_pending = 0;
+		msk->cb_flags &= ~flags;
 		spin_unlock_bh(&sk->sk_lock.slock);
 		if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
 			__mptcp_flush_join_list(sk);
@@ -3101,11 +3103,11 @@ static void mptcp_release_cb(struct sock *sk)
 	/* be sure to set the current sk state before tacking actions
 	 * depending on sk_state
 	 */
-	if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags))
+	if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
 		__mptcp_set_connected(sk);
-	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+	if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
 		__mptcp_clean_una_wakeup(sk);
-	if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+	if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
 		__mptcp_error_report(sk);
 
 	__mptcp_update_rmem(sk);
@@ -3147,7 +3149,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
 		if (!sock_owned_by_user(sk))
 			__mptcp_subflow_push_pending(sk, ssk);
 		else
-			set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+			__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
 		mptcp_data_unlock(sk);
 		mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
 	}
@@ -3267,7 +3269,7 @@ bool mptcp_finish_join(struct sock *ssk)
 	} else {
 		sock_hold(ssk);
 		list_add_tail(&subflow->node, &msk->join_list);
-		set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->flags);
+		__set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags);
 	}
 	mptcp_data_unlock(parent);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 4a140aec68b3..fca045f1936f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -110,18 +110,20 @@
 /* MPTCP TCPRST flags */
 #define MPTCP_RST_TRANSIENT	BIT(0)
 
-/* MPTCP socket flags */
+/* MPTCP socket atomic flags */
 #define MPTCP_NOSPACE		1
 #define MPTCP_WORK_RTX		2
 #define MPTCP_WORK_EOF		3
 #define MPTCP_FALLBACK_DONE	4
 #define MPTCP_WORK_CLOSE_SUBFLOW 5
-#define MPTCP_PUSH_PENDING	6
-#define MPTCP_CLEAN_UNA		7
-#define MPTCP_ERROR_REPORT	8
-#define MPTCP_RETRANSMIT	9
-#define MPTCP_FLUSH_JOIN_LIST	10
-#define MPTCP_CONNECTED		11
+
+/* MPTCP socket release cb flags */
+#define MPTCP_PUSH_PENDING	1
+#define MPTCP_CLEAN_UNA		2
+#define MPTCP_ERROR_REPORT	3
+#define MPTCP_RETRANSMIT	4
+#define MPTCP_FLUSH_JOIN_LIST	5
+#define MPTCP_CONNECTED		6
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
@@ -243,6 +245,8 @@ struct mptcp_sock {
 	u32		token;
 	int		rmem_released;
 	unsigned long	flags;
+	unsigned long	cb_flags;
+	unsigned long	push_pending;
 	bool		recovery;		/* closing subflow write queue reinjected */
 	bool		can_ack;
 	bool		fully_established;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 536a322a6fd0..09225b57c7f6 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk)
 	if (!sock_owned_by_user(sk))
 		__mptcp_set_connected(sk);
 	else
-		set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+		__set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
 	mptcp_data_unlock(sk);
 }
 
@@ -1280,7 +1280,7 @@ static void subflow_error_report(struct sock *ssk)
 	if (!sock_owned_by_user(sk))
 		__mptcp_error_report(sk);
 	else
-		set_bit(MPTCP_ERROR_REPORT,  &mptcp_sk(sk)->flags);
+		__set_bit(MPTCP_ERROR_REPORT,  &mptcp_sk(sk)->cb_flags);
 	mptcp_data_unlock(sk);
 }
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next 0/2] mptcp: a couple of cleanups
  2021-12-06 17:34 [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Paolo Abeni
  2021-12-06 17:34 ` [PATCH mptcp-next 1/2] mptcp: cleanup MPJ subflow list handling Paolo Abeni
  2021-12-06 17:34 ` [PATCH mptcp-next 2/2] mptcp: avoid atomic bit manipulation when possible Paolo Abeni
@ 2021-12-09  1:03 ` Mat Martineau
  2 siblings, 0 replies; 7+ messages in thread
From: Mat Martineau @ 2021-12-09  1:03 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

On Mon, 6 Dec 2021, Paolo Abeni wrote:

> The first patch was already shared as RFC, this iteration
> addresses a couple of bugs there.
>
> The second patch removes a bunch of conditionals and atomic
> operations in the fast path, but the performance impact is
> actually below noise level. Still I think it's worthy, as the
> unneeded atomic operations are confusing.
>
> Paolo Abeni (2):
>  mptcp: cleanup MPJ subflow list handling
>  mptcp: avoid atomic bit manipulation when possible
>
> net/mptcp/pm_netlink.c |   3 -
> net/mptcp/protocol.c   | 149 ++++++++++++++++++-----------------------
> net/mptcp/protocol.h   |  31 ++++-----
> net/mptcp/sockopt.c    |  24 ++-----
> net/mptcp/subflow.c    |   9 ++-
> 5 files changed, 89 insertions(+), 127 deletions(-)

Looks good to me:

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>

Matthieu, note that this patch set depends on the "mptcp: improve subflow 
creation on errors" patches.

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next 0/2] mptcp: a couple of cleanups
  2023-03-06 18:30 Paolo Abeni
  2023-03-07 16:28 ` Matthieu Baerts
@ 2023-03-07 17:41 ` Matthieu Baerts
  1 sibling, 0 replies; 7+ messages in thread
From: Matthieu Baerts @ 2023-03-07 17:41 UTC (permalink / raw)
  To: Paolo Abeni, mptcp

Hi Paolo,

On 06/03/2023 19:30, Paolo Abeni wrote:
> After the recent fixes we have both a good changes and some
> needs to simplify subflow_syn_recv_sock().
> 
> A couple of patches in that direction, also addressing a long
> standing feature issue.

Thank you for the patches!

Now in our tree (feat. for net-next):

New patches for t/upstream:
- a8d0495b5733: mptcp: avoid unneeded address copy
- 7526f37142a5: mptcp: simplify subflow_syn_recv_sock()
- Results: c26d7d0e4343..6f879388dfd8 (export)

Tests are now in progress:

https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20230307T173437

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next 0/2] mptcp: a couple of cleanups
  2023-03-06 18:30 Paolo Abeni
@ 2023-03-07 16:28 ` Matthieu Baerts
  2023-03-07 17:41 ` Matthieu Baerts
  1 sibling, 0 replies; 7+ messages in thread
From: Matthieu Baerts @ 2023-03-07 16:28 UTC (permalink / raw)
  To: Paolo Abeni, mptcp

Hi Paolo,

On 06/03/2023 19:30, Paolo Abeni wrote:
> After the recent fixes we have both a good changes and some
> needs to simplify subflow_syn_recv_sock().
> 
> A couple of patches in that direction, also addressing a long
> standing feature issue.

Thank you for these patches!

I just have one question about the first patch but for the rest, it
looks good to me!

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 0/2] mptcp: a couple of cleanups
@ 2023-03-06 18:30 Paolo Abeni
  2023-03-07 16:28 ` Matthieu Baerts
  2023-03-07 17:41 ` Matthieu Baerts
  0 siblings, 2 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-03-06 18:30 UTC (permalink / raw)
  To: mptcp

After the recent fixes we have both a good changes and some
needs to simplify subflow_syn_recv_sock().

A couple of patches in that direction, also addressing a long
standing feature issue.

Paolo Abeni (2):
  mptcp: avoid unneeded address copy
  mptcp: simplify subflow_syn_recv_sock()

 net/mptcp/subflow.c | 40 ++++++++++------------------------------
 1 file changed, 10 insertions(+), 30 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-03-07 17:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-06 17:34 [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Paolo Abeni
2021-12-06 17:34 ` [PATCH mptcp-next 1/2] mptcp: cleanup MPJ subflow list handling Paolo Abeni
2021-12-06 17:34 ` [PATCH mptcp-next 2/2] mptcp: avoid atomic bit manipulation when possible Paolo Abeni
2021-12-09  1:03 ` [PATCH mptcp-next 0/2] mptcp: a couple of cleanups Mat Martineau
2023-03-06 18:30 Paolo Abeni
2023-03-07 16:28 ` Matthieu Baerts
2023-03-07 17:41 ` Matthieu Baerts

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.