From: Florian Westphal <fw@strlen.de>
To: <mptcp@lists.linux.dev>
Cc: Florian Westphal <fw@strlen.de>
Subject: [PATCH mptcp-next v2 02/10] mptcp: tag sequence_seq with socket state
Date: Tue, 13 Apr 2021 17:54:39 +0200 [thread overview]
Message-ID: <20210413155447.14043-3-fw@strlen.de> (raw)
In-Reply-To: <20210413155447.14043-1-fw@strlen.de>
Paolo Abeni suggested to avoid re-syncing new subflows because
they inherit options from listener. In case options were set on
listener but are not set on mptcp-socket there is no need to
do any synchronisation for new subflows.
This change sets sockopt_seq of new mptcp sockets to the seq of
the mptcp listener sock.
Subflow sequence is set to the embedded tcp listener sk.
Add a comment explaing why sk_state is involved in sockopt_seq
generation.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/mptcp/protocol.c | 12 ++++++++---
net/mptcp/protocol.h | 4 ++++
net/mptcp/sockopt.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
net/mptcp/subflow.c | 4 ++++
4 files changed, 62 insertions(+), 5 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b8036d508c46..ffc9dbb4c07e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -724,18 +724,23 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
+ bool ret = false;
if (likely(list_empty(&msk->join_list)))
return false;
spin_lock_bh(&msk->join_list_lock);
- list_for_each_entry(subflow, &msk->join_list, node)
- mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+ list_for_each_entry(subflow, &msk->join_list, node) {
+ u32 sseq = READ_ONCE(subflow->setsockopt_seq);
+ mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+ if (READ_ONCE(msk->setsockopt_seq) != sseq)
+ ret = true;
+ }
list_splice_tail_init(&msk->join_list, &msk->conn_list);
spin_unlock_bh(&msk->join_list_lock);
- return true;
+ return ret;
}
void __mptcp_flush_join_list(struct mptcp_sock *msk)
@@ -2710,6 +2715,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
if (mp_opt->mp_capable) {
msk->can_ack = true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 55fe05a40216..edc0128730df 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -256,6 +256,8 @@ struct mptcp_sock {
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
+
+ u32 setsockopt_seq;
};
#define mptcp_lock_sock(___sk, cb) do { \
@@ -414,6 +416,8 @@ struct mptcp_subflow_context {
long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
+ u32 setsockopt_seq;
+
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 4fdc0ad6acf7..27b49543fc58 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -24,6 +24,27 @@ static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
return msk->first;
}
+static u32 sockopt_seq_reset(const struct sock *sk)
+{
+ sock_owned_by_me(sk);
+
+ /* Highbits contain state. Allows to distinguish sockopt_seq
+ * of listener and established:
+ * s0 = new_listener()
+ * sockopt(s0) - seq is 1
+ * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
+ * sockopt(s0) - seq increments to 2 on s0
+ * sockopt(s1) // seq increments to 2 on s1 (different option)
+ * new ssk completes join, inherits options from s0 // seq 2
+ * Needs sync from mptcp join logic, but ssk->seq == msk->seq
+ *
+ * Set High order bits to sk_state so ssk->seq == msk->seq test
+ * will fail.
+ */
+
+ return (u32)sk->sk_state << 24u;
+}
+
static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -350,22 +371,44 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
return -EOPNOTSUPP;
}
+static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+{
+}
+
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
msk_owned_by_me(msk);
+
+ if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
+ __mptcp_sockopt_sync(msk, ssk);
+
+ subflow->setsockopt_seq = msk->setsockopt_seq;
+ }
}
void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ u32 seq;
- msk_owned_by_me(msk);
+ seq = sockopt_seq_reset(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u32 sseq = READ_ONCE(subflow->setsockopt_seq);
- mptcp_sockopt_sync(msk, ssk);
+ if (sseq != msk->setsockopt_seq) {
+ __mptcp_sockopt_sync(msk, ssk);
+ WRITE_ONCE(subflow->setsockopt_seq, seq);
+ } else if (sseq != seq) {
+ WRITE_ONCE(subflow->setsockopt_seq, seq);
+ }
cond_resched();
}
+
+ msk->setsockopt_seq = seq;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fc7107f8d81b..82e91b00ad39 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -681,6 +681,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
goto out;
}
+ /* ssk inherits options of listener sk */
+ ctx->setsockopt_seq = listener->setsockopt_seq;
+
if (ctx->mp_capable) {
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
@@ -696,6 +699,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
+ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
--
2.26.3
next prev parent reply other threads:[~2021-04-13 15:55 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-13 15:54 [PATCH mptcp-next v2 00/10] mptcp: add SOL_SOCKET support Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 01/10] mptcp: add skeleton to sync msk socket options to subflows Florian Westphal
2021-04-13 15:54 ` Florian Westphal [this message]
2021-04-13 15:54 ` [PATCH mptcp-next v2 03/10] mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 04/10] mptcp: setsockopt: handle receive/send buffer and device bind Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 05/10] mptcp: setsockopt: support SO_LINGER Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 06/10] mptcp: setsockopt: add SO_MARK support Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 07/10] mptcp: setsockopt: add SO_INCOMING_CPU Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 08/10] mptcp: setsockopt: SO_DEBUG and no-op options Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 09/10] mptcp: sockopt: add TCP_CONGESTION and TCP_INFO Florian Westphal
2021-04-13 15:54 ` [PATCH mptcp-next v2 10/10] selftests: mptcp: add packet mark test case Florian Westphal
2021-04-13 16:30 ` [PATCH mptcp-next v2 00/10] mptcp: add SOL_SOCKET support Paolo Abeni
2021-04-14 16:07 ` Matthieu Baerts
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210413155447.14043-3-fw@strlen.de \
--to=fw@strlen.de \
--cc=mptcp@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).