All of lore.kernel.org
 help / color / mirror / Atom feed
From: Florian Westphal <fw@strlen.de>
To: <mptcp@lists.linux.dev>
Cc: Florian Westphal <fw@strlen.de>
Subject: [PATCH mptcp 02/10] mptcp: tag sequence_seq with socket state
Date: Thu,  8 Apr 2021 20:49:28 +0200	[thread overview]
Message-ID: <20210408184936.6245-3-fw@strlen.de> (raw)
In-Reply-To: <20210408184936.6245-1-fw@strlen.de>

Paolo Abeni suggested to avoid re-syncing new subflows because
they inherit options from listener. In case options were set on
listener but are not set on mptcp-socket there is no need to
do any synchronisation for new subflows.

This change sets sockopt_seq of new mptcp sockets to the seq of
the mptcp listener sock.

Subflow sequence is set to the embedded tcp listener sk.
Add a comment explaing why sk_state is involved in sockopt_seq
generation.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/protocol.c | 16 ++++++++++-----
 net/mptcp/protocol.h |  4 ++++
 net/mptcp/sockopt.c  | 47 ++++++++++++++++++++++++++++++++++++++++++--
 net/mptcp/subflow.c  |  4 ++++
 4 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2233baf03c3a..26e3ffe8840e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -729,11 +729,14 @@ static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
 	if (likely(list_empty(&msk->join_list)))
 		return false;
 
-	ret = true;
 	spin_lock_bh(&msk->join_list_lock);
-	list_for_each_entry(subflow, &msk->join_list, node)
-		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+	list_for_each_entry(subflow, &msk->join_list, node) {
+		u32 sseq = READ_ONCE(subflow->setsockopt_seq);
 
+		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+		if (READ_ONCE(msk->setsockopt_seq) != sseq)
+			ret = true;
+	}
 	list_splice_tail_init(&msk->join_list, &msk->conn_list);
 	spin_unlock_bh(&msk->join_list_lock);
 
@@ -745,7 +748,8 @@ void __mptcp_flush_join_list(struct mptcp_sock *msk)
 	if (likely(!mptcp_do_flush_join_list(msk)))
 		return;
 
-	if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
+	if (msk->setsockopt_seq &&
+	    !test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
 		mptcp_schedule_work((struct sock *)msk);
 }
 
@@ -758,7 +762,8 @@ static void mptcp_flush_join_list(struct mptcp_sock *msk)
 	if (!mptcp_do_flush_join_list(msk) && !sync_needed)
 		return;
 
-	mptcp_sockopt_sync_all(msk);
+	if (msk->setsockopt_seq)
+		mptcp_sockopt_sync_all(msk);
 }
 
 static bool mptcp_timer_pending(struct sock *sk)
@@ -2712,6 +2717,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->snd_nxt = msk->write_seq;
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
 
 	if (mp_opt->mp_capable) {
 		msk->can_ack = true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 55fe05a40216..edc0128730df 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -256,6 +256,8 @@ struct mptcp_sock {
 		u64	time;	/* start time of measurement window */
 		u64	rtt_us; /* last maximum rtt of subflows */
 	} rcvq_space;
+
+	u32 setsockopt_seq;
 };
 
 #define mptcp_lock_sock(___sk, cb) do {					\
@@ -414,6 +416,8 @@ struct mptcp_subflow_context {
 	long	delegated_status;
 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
 
+	u32 setsockopt_seq;
+
 	struct	sock *tcp_sock;	    /* tcp sk backpointer */
 	struct	sock *conn;	    /* parent mptcp_sock */
 	const	struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 4fdc0ad6acf7..b1950be15abe 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -24,6 +24,27 @@ static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
 	return msk->first;
 }
 
+static u32 sockopt_seq_reset(const struct sock *sk)
+{
+	sock_owned_by_me(sk);
+
+	/* Highbits contain state.  Allows to distinguish sockopt_seq
+	 * of listener and established:
+	 * s0 = new_listener()
+	 * sockopt(s0) - seq is 1
+	 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
+	 * sockopt(s0) - seq increments to 2 on s0
+	 * sockopt(s1) // seq increments to 2 on s1 (different option)
+	 * new ssk completes join, inherits options from s0 // seq 2
+	 * Needs sync from mptcp join logic, but ssk->seq == msk->seq
+	 *
+	 * Set High order bits to sk_state so ssk->seq == msk->seq test
+	 * will fail.
+	 */
+
+	return sk->sk_state << 24u;
+}
+
 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 				       sockptr_t optval, unsigned int optlen)
 {
@@ -350,22 +371,44 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 	return -EOPNOTSUPP;
 }
 
+static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+{
+}
+
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
 {
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
 	msk_owned_by_me(msk);
+
+	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
+		__mptcp_sockopt_sync(msk, ssk);
+
+		subflow->setsockopt_seq = msk->setsockopt_seq;
+	}
 }
 
 void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	u32 seq;
 
-	msk_owned_by_me(msk);
+	seq = sockopt_seq_reset(sk);
 
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		u32 sseq = READ_ONCE(subflow->setsockopt_seq);
 
-		mptcp_sockopt_sync(msk, ssk);
+		if (sseq != msk->setsockopt_seq) {
+			__mptcp_sockopt_sync(msk, ssk);
+			WRITE_ONCE(subflow->setsockopt_seq, seq);
+		} else if (sseq != seq) {
+			WRITE_ONCE(subflow->setsockopt_seq, seq);
+		}
 
 		cond_resched();
 	}
+
+	msk->setsockopt_seq = seq;
 }
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fc7107f8d81b..82e91b00ad39 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -681,6 +681,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			goto out;
 		}
 
+		/* ssk inherits options of listener sk */
+		ctx->setsockopt_seq = listener->setsockopt_seq;
+
 		if (ctx->mp_capable) {
 			/* this can't race with mptcp_close(), as the msk is
 			 * not yet exposted to user-space
@@ -696,6 +699,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			 * created mptcp socket
 			 */
 			new_msk->sk_destruct = mptcp_sock_destruct;
+			mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
 			mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
 			mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
 			ctx->conn = new_msk;
-- 
2.26.3


  parent reply	other threads:[~2021-04-08 18:49 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-08 18:49 [PATCH mptcp 00/10] mptcp: add SOL_SOCKET support Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 01/10] mptcp: add skeleton to sync msk socket options to subflows Florian Westphal
2021-04-08 18:49 ` Florian Westphal [this message]
2021-04-10  0:12   ` [PATCH mptcp 02/10] mptcp: tag sequence_seq with socket state Mat Martineau
2021-04-12  8:05     ` Florian Westphal
2021-04-13  9:58   ` Paolo Abeni
2021-04-13 14:58     ` Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 03/10] mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY Florian Westphal
2021-04-13 10:10   ` Paolo Abeni
2021-04-08 18:49 ` [PATCH mptcp 04/10] mptcp: setsockopt: handle receive/send buffer and device bind Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 05/10] mptcp: setsockopt: support SO_LINGER Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 06/10] mptcp: setsockopt: add SO_MARK support Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 07/10] mptcp: setsockopt: add SO_INCOMING_CPU Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 08/10] mptcp: setsockopt: SO_DEBUG and no-op options Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 09/10] mptcp: sockopt: add TCP_CONGESTION and TCP_INFO Florian Westphal
2021-04-08 18:49 ` [PATCH mptcp 10/10] selftests: mptcp: add packet mark test case Florian Westphal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210408184936.6245-3-fw@strlen.de \
    --to=fw@strlen.de \
    --cc=mptcp@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.