All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets
@ 2022-02-17 14:25 Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 1/5] mptcp: check netns in mptcp_token_exists Florian Westphal
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Florian Westphal @ 2022-02-17 14:25 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Changes since v1:
- avoid uneeded sk refcount inc/dec pair
- use more descriptive name for function name
- fix typo in rfc number (Mathieu)
- don't increment listner refcount, caller assumes noref

Not changed:
 - hook location, plain tcp sockets take precedence.
 - per-netns listener

Global listener means other namespaces are affected in case
of backlog overflow. Pernet listeners avoid that, so I don't see
a strong argument for tcp refactoring that would allow global sk.

This series introduces a hidden per-netns pseudo "listener" socket to
handle mptcp join requests with a valid token but that do not match an
existing listening socket.

First patch extens existing helper to also restrict search to the
given netns.

Second patch is a minor preparation patch: MPTCP Join requests packets
that fail to find a suitable socket by means of standard address/port
demultiplexing will be steered to a pseudo-listener, similar to TPROXY
interception.

This pseudo listener isn't bound to an address or port (all zero), so we
need to fetch the port number from the tcp header and not the listener sk.

Patch three adds a stub to the tcp demux code.
This has no functionality, its extra to make tcp datapath change stand out.

Fourth patch is the bulk work, it adds per netns listener and implements
token-based socket demultiplexing.

Last patch zaps the per-address sockets from mptcp, they are not needed
anymore.

Florian Westphal (5):
  mptcp: check netns in mptcp_token_exists
  mptcp: prefer ip address in syn skb instead of listen sk bound address
  tcp: add mptcp join demultiplex hooks
  mptcp: handle join requests via pernet listen socket
  mptcp: remove per-address listening sockets

 include/net/mptcp.h    |  15 +++
 net/ipv4/tcp_ipv4.c    |   4 +
 net/ipv6/tcp_ipv6.c    |  23 +++--
 net/mptcp/ctrl.c       | 211 ++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/pm_netlink.c |  82 +++-------------
 net/mptcp/protocol.c   |   2 +-
 net/mptcp/protocol.h   |   6 +-
 net/mptcp/subflow.c    |  12 ++-
 net/mptcp/token.c      |   5 +-
 9 files changed, 272 insertions(+), 88 deletions(-)
-- 
2.34.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH mptcp-next v2 1/5] mptcp: check netns in mptcp_token_exists
  2022-02-17 14:25 [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets Florian Westphal
@ 2022-02-17 14:25 ` Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 2/5] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Florian Westphal @ 2022-02-17 14:25 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

This will be used by a followup patch to check if the token
specified in mp_join option exists in the current netns.

At this time, tokens are unique across all namespaces, but
we need to treat a token that exists in netns x as 'does not exist'
when doing existence check from netns y.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/protocol.h | 2 +-
 net/mptcp/subflow.c  | 4 +++-
 net/mptcp/token.c    | 5 +++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 18ca0248c084..c43ca46dbc27 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -725,7 +725,7 @@ void mptcp_token_destroy_request(struct request_sock *req);
 int mptcp_token_new_connect(struct sock *sk);
 void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
 			struct mptcp_sock *msk);
-bool mptcp_token_exists(u32 token);
+bool mptcp_token_exists(const struct net *net, u32 token);
 struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
 struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
 					 long *s_num);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e727d838da0e..be43077fe76e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -176,10 +176,12 @@ static int subflow_check_req(struct request_sock *req,
 		} while (subflow_req->local_key == 0);
 
 		if (unlikely(req->syncookie)) {
+			const struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
+
 			mptcp_crypto_key_sha(subflow_req->local_key,
 					     &subflow_req->token,
 					     &subflow_req->idsn);
-			if (mptcp_token_exists(subflow_req->token)) {
+			if (mptcp_token_exists(net, subflow_req->token)) {
 				if (retries-- > 0)
 					goto again;
 				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index f52ee7b26aed..0593c83385e0 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -203,7 +203,7 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
 	spin_unlock_bh(&bucket->lock);
 }
 
-bool mptcp_token_exists(u32 token)
+bool mptcp_token_exists(const struct net *net, u32 token)
 {
 	struct hlist_nulls_node *pos;
 	struct token_bucket *bucket;
@@ -216,7 +216,8 @@ bool mptcp_token_exists(u32 token)
 again:
 	sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
 		msk = mptcp_sk(sk);
-		if (READ_ONCE(msk->token) == token)
+		if (READ_ONCE(msk->token) == token &&
+		    net_eq(sock_net(sk), net))
 			goto found;
 	}
 	if (get_nulls_value(pos) != (token & token_mask))
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH mptcp-next v2 2/5] mptcp: prefer ip address in syn skb instead of listen sk bound address
  2022-02-17 14:25 [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 1/5] mptcp: check netns in mptcp_token_exists Florian Westphal
@ 2022-02-17 14:25 ` Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks Florian Westphal
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Florian Westphal @ 2022-02-17 14:25 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Once we change mptcp to use tproxy-like scheme to steer mptcp join
requests to a special pernet socket, the 'sk bound address' becomes
meaningless because it will never be identical to the tcp dport/ip daddr
of the on-wire packet.

Prepare for this: pass the skbuff and use the packet data instead of
the address the listener socket is bound to.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/pm_netlink.c | 17 +++++++++++++++--
 net/mptcp/protocol.h   |  2 +-
 net/mptcp/subflow.c    |  5 +++--
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56f5603c10f2..614b5d05aa62 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -279,13 +279,26 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
 	return NULL;
 }
 
-bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
+static void skb_fetch_src_address(const struct sk_buff *skb,
+				  struct mptcp_addr_info *addr)
+{
+	addr->port = tcp_hdr(skb)->dest;
+	if (addr->family == AF_INET)
+		addr->addr.s_addr = ip_hdr(skb)->daddr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	else if (addr->family == AF_INET6)
+		addr->addr6 = ipv6_hdr(skb)->daddr;
+#endif
+}
+
+bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, int af, const struct sk_buff *skb)
 {
 	struct mptcp_pm_add_entry *entry;
 	struct mptcp_addr_info saddr;
 	bool ret = false;
 
-	local_address((struct sock_common *)sk, &saddr);
+	saddr.family = af;
+	skb_fetch_src_address(skb, &saddr);
 
 	spin_lock_bh(&msk->pm.lock);
 	list_for_each_entry(entry, &msk->pm.anno_list, list) {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c43ca46dbc27..7bd064b68b51 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -760,7 +760,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
-bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
+bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, int af, const struct sk_buff *skb);
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 		       const struct mptcp_addr_info *addr, bool check_id);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index be43077fe76e..8be20f7b76df 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -218,7 +218,8 @@ static int subflow_check_req(struct request_sock *req,
 			pr_debug("syn inet_sport=%d %d",
 				 ntohs(inet_sk(sk_listener)->inet_sport),
 				 ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
-			if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
+			if (!mptcp_pm_sport_in_anno_list(subflow_req->msk,
+							 sk_listener->sk_family, skb)) {
 				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
 				return -EPERM;
 			}
@@ -750,7 +751,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 				pr_debug("ack inet_sport=%d %d",
 					 ntohs(inet_sk(sk)->inet_sport),
 					 ntohs(inet_sk((struct sock *)owner)->inet_sport));
-				if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
+				if (!mptcp_pm_sport_in_anno_list(owner, sk->sk_family, skb)) {
 					SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
 					goto dispose_child;
 				}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks
  2022-02-17 14:25 [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 1/5] mptcp: check netns in mptcp_token_exists Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 2/5] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
@ 2022-02-17 14:25 ` Florian Westphal
  2022-02-17 21:58   ` Paolo Abeni
  2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
  2022-02-17 14:25 ` [PATCH mptcp-next v2 5/5] mptcp: remove per-address listening sockets Florian Westphal
  4 siblings, 1 reply; 16+ messages in thread
From: Florian Westphal @ 2022-02-17 14:25 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Split from the next patch to make core tcp changes more obvious:
add a dummy function that gets called after tcp socket demux came up
empty.

This will be used by mptcp to check if a tcp syn contains an mptcp
join option with a valid token (connection id).

If so, a hidden pernet mptcp listener socket is returned and packet
resumes normally.

This patch series does not cover timewait sockets so far.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h | 5 +++++
 net/ipv4/tcp_ipv4.c | 4 ++++
 net/ipv6/tcp_ipv6.c | 4 ++++
 3 files changed, 13 insertions(+)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b1afd6f5cc4..5ee422b56902 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -197,6 +197,10 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 
 	return htonl(0u);
 }
+static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
+{
+	return NULL;
+}
 #else
 
 static inline void mptcp_init(void)
@@ -274,6 +278,7 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
 }
 
 static inline __be32 mptcp_reset_option(const struct sk_buff *skb)  { return htonl(0u); }
+static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { return NULL; }
 #endif /* CONFIG_MPTCP */
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6873f46fc8ba..6e6675a09443 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2140,6 +2140,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	sk = mptcp_handle_join(AF_INET, skb);
+	if (sk)
+		goto process;
+
 	tcp_v4_fill_cb(skb, iph, th);
 
 	if (tcp_checksum_complete(skb)) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0c648bf07f39..788040db8e9e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1782,6 +1782,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	sk = mptcp_handle_join(AF_INET6, skb);
+	if (sk)
+		goto process;
+
 	tcp_v6_fill_cb(skb, hdr, th);
 
 	if (tcp_checksum_complete(skb)) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
  2022-02-17 14:25 [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets Florian Westphal
                   ` (2 preceding siblings ...)
  2022-02-17 14:25 ` [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks Florian Westphal
@ 2022-02-17 14:25 ` Florian Westphal
  2022-02-17 19:46     ` kernel test robot
                     ` (3 more replies)
  2022-02-17 14:25 ` [PATCH mptcp-next v2 5/5] mptcp: remove per-address listening sockets Florian Westphal
  4 siblings, 4 replies; 16+ messages in thread
From: Florian Westphal @ 2022-02-17 14:25 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Currently mptcp adds kernel-based listener socket for all
netlink-configured mptcp address endpoints.

This has caveats because kernel may interfere with unrelated programs
that use same address/port pairs.

RFC 8684 says:
 Demultiplexing subflow SYNs MUST be done using the token; this is
 unlike traditional TCP, where the destination port is used for
 demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
 packets is done using the 5-tuple, as in traditional TCP.

This patch deviates from this in that it retrains the existing checks of
verifying the incoming requests destination vs. the list of announced
addresses.

This can be relaxed later if deemed appropriate.

Furthermore, TCP-only listeners take precedence: An MPTCP peer
MUST NOT announce address:port pairs that are already in use by a
non-mptcp listener.
This could be changed, but it requires move of mptcp_handle_join()
hook *before* the tcp port demux, i.e. an additional conditional in
hotpath.  As-is, the additional conditional (syn && !rst && ...) is
in the 'no socket found' path.

The pernet 'listening' socket is hidden from userspace.
It is not part of any hashes and not bound to any address or port.

TPROXY-like semantics apply: If tcp demux cannot find a port for a given
packet, check if the packet is a syn packet with a valid join token.

If so, the pernet listener is returned and tcp processing resumes.
Otherwise, handling is intentical.

This patch does not cover timewait sockets.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h  |  10 ++
 net/ipv6/tcp_ipv6.c  |  19 ++--
 net/mptcp/ctrl.c     | 211 ++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.c |   2 +-
 net/mptcp/protocol.h |   2 +-
 net/mptcp/subflow.c  |   3 +
 6 files changed, 233 insertions(+), 14 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 5ee422b56902..49c188b978e1 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
 				  struct sk_buff *skb);
 
 __be32 mptcp_get_reset_option(const struct sk_buff *skb);
+struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
 
 static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 {
@@ -199,6 +200,11 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 }
 static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
 {
+	const struct tcphdr *th = tcp_hdr(skb);
+
+	if (th->syn && !th->ack && !th->rst && !th->fin)
+		return __mptcp_handle_join(af, skb);
+
 	return NULL;
 }
 #else
@@ -283,9 +289,13 @@ static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { retu
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 int mptcpv6_init(void);
+int mptcpv6_init_net(struct net *net);
+void mptcpv6_exit_net(struct net *net);
 void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
 #elif IS_ENABLED(CONFIG_IPV6)
 static inline int mptcpv6_init(void) { return 0; }
+static inline int mptcpv6_init_net(struct net *net) { return 0; }
+static inline void mptcpv6_exit_net(struct net *net) { }
 static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
 #endif
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 788040db8e9e..3b8608d35dcd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2233,13 +2233,22 @@ static struct inet_protosw tcpv6_protosw = {
 
 static int __net_init tcpv6_net_init(struct net *net)
 {
-	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
-				    SOCK_RAW, IPPROTO_TCP, net);
+	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+				       SOCK_RAW, IPPROTO_TCP, net);
+	if (err)
+		return err;
+
+	err = mptcpv6_init_net(net);
+	if (err)
+		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+
+	return err;
 }
 
 static void __net_exit tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+	mptcpv6_exit_net(net);
 }
 
 static struct pernet_operations tcpv6_net_ops = {
@@ -2264,15 +2273,9 @@ int __init tcpv6_init(void)
 	if (ret)
 		goto out_tcpv6_protosw;
 
-	ret = mptcpv6_init();
-	if (ret)
-		goto out_tcpv6_pernet_subsys;
-
 out:
 	return ret;
 
-out_tcpv6_pernet_subsys:
-	unregister_pernet_subsys(&tcpv6_net_ops);
 out_tcpv6_protosw:
 	inet6_unregister_protosw(&tcpv6_protosw);
 out_tcpv6_protocol:
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..6358c803ba12 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -21,6 +21,12 @@ static int mptcp_pernet_id;
 static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
 #endif
 
+struct mptcp_join_sk {
+	struct sock *sk;
+	struct inet_bind_bucket *tb;
+	struct inet_bind_hashbucket head;
+};
+
 struct mptcp_pernet {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *ctl_table_hdr;
@@ -32,6 +38,18 @@ struct mptcp_pernet {
 	u8 checksum_enabled;
 	u8 allow_join_initial_addr_port;
 	u8 pm_type;
+
+	/* pernet listener to handle mptcp join requests
+	 * based on the mptcp token.
+	 *
+	 * Has to be pernet because tcp uses
+	 * sock_net(sk_listener) to obtain the net namespace for
+	 * the syn/ack route lookup.
+	 */
+	struct mptcp_join_sk join4;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	struct mptcp_join_sk join6;
+#endif
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -185,13 +203,173 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
 
 #endif /* CONFIG_SYSCTL */
 
+struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
+{
+	struct mptcp_options_received mp_opt;
+	struct mptcp_pernet *pernet;
+	struct socket *ssock;
+	struct sock *lsk;
+	struct net *net;
+
+	/* paranoia check: don't allow 0 destination port,
+	 * else __inet_inherit_port will insert the child socket
+	 * into the phony hash slot of the pernet listener.
+	 */
+	if (tcp_hdr(skb)->dest == 0)
+		return NULL;
+
+	mptcp_get_options(skb, &mp_opt);
+
+	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
+		return NULL;
+
+	net = dev_net(skb_dst(skb)->dev);
+	if (!mptcp_is_enabled(net))
+		return NULL;
+
+	/* RFC8684: If the token is unknown [..], the receiver will send
+	 * back a reset (RST) signal, analogous to an unknown port in TCP,
+	 * containing an MP_TCPRST option (Section 3.6) [..]
+	 */
+	if (!mptcp_token_exists(net, mp_opt.token)) {
+		struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
+
+		if (ext) {
+			memset(ext, 0, sizeof(*ext));
+			ext->reset_reason = MPTCP_RST_EMPTCP;
+		}
+		return NULL;
+	}
+
+	pernet = mptcp_get_pernet(net);
+
+	switch (af) {
+	case AF_INET:
+		lsk = pernet->join4.sk;
+		break;
+	case AF_INET6:
+		lsk = pernet->join6.sk;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return NULL;
+	}
+
+	ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
+	if (WARN_ON(!ssock))
+		return NULL;
+
+	return ssock->sk;
+}
+
+static struct socket *mptcp_create_join_listen_socket(struct net *net, int af)
+{
+	struct socket *s, *ssock;
+	int err;
+
+	err = sock_create_kern(net, af, SOCK_STREAM, IPPROTO_MPTCP, &s);
+	if (err)
+		return ERR_PTR(err);
+
+	ssock = __mptcp_nmpc_socket(mptcp_sk(s->sk));
+	if (!ssock) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	ssock->sk->sk_max_ack_backlog = SOMAXCONN;
+	inet_sk_state_store(ssock->sk, TCP_LISTEN);
+
+	s->sk->sk_max_ack_backlog = SOMAXCONN;
+	inet_sk_state_store(s->sk, TCP_LISTEN);
+
+	s->sk->sk_net_refcnt = 1;
+	get_net_track(net, &s->sk->ns_tracker, GFP_KERNEL);
+	sock_inuse_add(net, 1);
+
+	return s;
+out:
+	sock_release(s);
+	return ERR_PTR(err);
+}
+
+static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_join_sk *join_sk)
+{
+	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(sk));
+	struct inet_hashinfo *table = ssock->sk->sk_prot->h.hashinfo;
+	struct inet_bind_bucket *tb;
+
+	spin_lock_init(&join_sk->head.lock);
+	INIT_HLIST_HEAD(&join_sk->head.chain);
+
+	/* Our "listen socket" isn't bound to any address or port.
+	 * Conceptually, SYN packet with mptcp join request are steered to
+	 * this pernet socket just like TPROXY steals arbitrary connection
+	 * requests to assign them to listening socket with different
+	 * address or port.
+	 *
+	 * The bind_bucket is needed for sake of __inet_inherit_port(),
+	 * so it can place the new child socket in the correct
+	 * bind_bucket slot.
+	 *
+	 * A phony head is used to hide this socket from normal sk loookup.
+	 */
+	tb = inet_bind_bucket_create(table->bind_bucket_cachep,
+				     net, &join_sk->head, 0, 0);
+	if (!tb)
+		return -ENOMEM;
+
+	inet_csk(ssock->sk)->icsk_bind_hash = tb;
+	return 0;
+}
+
 static int __net_init mptcp_net_init(struct net *net)
 {
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+	struct socket *sock;
+	int err;
 
 	mptcp_pernet_set_defaults(pernet);
 
-	return mptcp_pernet_new_table(net, pernet);
+	err = mptcp_pernet_new_table(net, pernet);
+	if (err)
+		return err;
+
+	sock = mptcp_create_join_listen_socket(net, AF_INET);
+	if (IS_ERR(sock)) {
+		err = PTR_ERR(sock);
+		goto out_table;
+	}
+
+	err = mptcp_init_join_sk(net, sock->sk, &pernet->join4);
+	if (err) {
+		sock_release(sock);
+		goto out_table;
+	}
+
+	/* struct sock is still reachable via sock->sk_socket backpointer */
+	pernet->join4.sk = sock->sk;
+	return err;
+
+out_table:
+	if (!net_eq(net, &init_net))
+		mptcp_pernet_del_table(pernet);
+	return err;
+}
+
+static void __net_exit mptcp_exit_join_sk(struct mptcp_join_sk *jsk)
+{
+	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(jsk->sk));
+	struct inet_bind_bucket *tb;
+	struct inet_hashinfo *table;
+
+	table = ssock->sk->sk_prot->h.hashinfo;
+
+	tb = inet_csk(ssock->sk)->icsk_bind_hash;
+	inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
+
+	ssock = jsk->sk->sk_socket;
+	sock_release(ssock);
 }
 
 /* Note: the callback will only be called per extra netns */
@@ -200,6 +378,7 @@ static void __net_exit mptcp_net_exit(struct net *net)
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
 
 	mptcp_pernet_del_table(pernet);
+	mptcp_exit_join_sk(&pernet->join4);
 }
 
 static struct pernet_operations mptcp_pernet_ops = {
@@ -219,12 +398,36 @@ void __init mptcp_init(void)
 }
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int __init mptcpv6_init(void)
+int __net_init mptcpv6_init_net(struct net *net)
 {
+	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+	struct socket *sock;
 	int err;
 
-	err = mptcp_proto_v6_init();
+	if (net_eq(net, &init_net)) {
+		err = mptcp_proto_v6_init();
+		if (err)
+			return err;
+	}
 
-	return err;
+	sock = mptcp_create_join_listen_socket(net, AF_INET6);
+	if (IS_ERR(sock))
+		return PTR_ERR(sock);
+
+	err = mptcp_init_join_sk(net, sock->sk, &pernet->join6);
+	if (err) {
+		sock_release(sock);
+		return err;
+	}
+
+	pernet->join6.sk = sock->sk;
+	return 0;
+}
+
+void __net_exit mptcpv6_exit_net(struct net *net)
+{
+	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+
+	mptcp_exit_join_sk(&pernet->join6);
 }
 #endif
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4599bde215b2..5b54e3c8efea 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3777,7 +3777,7 @@ static struct inet_protosw mptcp_v6_protosw = {
 	.flags		= INET_PROTOSW_ICSK,
 };
 
-int __init mptcp_proto_v6_init(void)
+int __net_init mptcp_proto_v6_init(void)
 {
 	int err;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7bd064b68b51..6a81e2a21301 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -647,7 +647,7 @@ static inline bool mptcp_has_another_subflow(struct sock *ssk)
 
 void __init mptcp_proto_init(void);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int __init mptcp_proto_v6_init(void);
+int __net_init mptcp_proto_v6_init(void);
 #endif
 
 struct sock *mptcp_sk_clone(const struct sock *sk,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8be20f7b76df..4696d27a8994 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -116,6 +116,9 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
 
 static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
 {
+	if (inet_sk(sk)->inet_sport == 0)
+		return true;
+
 	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH mptcp-next v2 5/5] mptcp: remove per-address listening sockets
  2022-02-17 14:25 [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets Florian Westphal
                   ` (3 preceding siblings ...)
  2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
@ 2022-02-17 14:25 ` Florian Westphal
  4 siblings, 0 replies; 16+ messages in thread
From: Florian Westphal @ 2022-02-17 14:25 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Not required anymore, syn packets with a join requests are redirected
to pernet mptcp pseudo-listening socket.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/pm_netlink.c | 65 ------------------------------------------
 1 file changed, 65 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 614b5d05aa62..8a26c7177054 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry {
 	struct mptcp_addr_info	addr;
 	u8			flags;
 	int			ifindex;
-	struct socket		*lsk;
 };
 
 struct mptcp_pm_add_entry {
@@ -954,57 +953,6 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
 	return ret;
 }
 
-static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
-					    struct mptcp_pm_addr_entry *entry)
-{
-	int addrlen = sizeof(struct sockaddr_in);
-	struct sockaddr_storage addr;
-	struct mptcp_sock *msk;
-	struct socket *ssock;
-	int backlog = 1024;
-	int err;
-
-	err = sock_create_kern(sock_net(sk), entry->addr.family,
-			       SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
-	if (err)
-		return err;
-
-	msk = mptcp_sk(entry->lsk->sk);
-	if (!msk) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	ssock = __mptcp_nmpc_socket(msk);
-	if (!ssock) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	if (entry->addr.family == AF_INET6)
-		addrlen = sizeof(struct sockaddr_in6);
-#endif
-	err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
-	if (err) {
-		pr_warn("kernel_bind error, err=%d", err);
-		goto out;
-	}
-
-	err = kernel_listen(ssock, backlog);
-	if (err) {
-		pr_warn("kernel_listen error, err=%d", err);
-		goto out;
-	}
-
-	return 0;
-
-out:
-	sock_release(entry->lsk);
-	return err;
-}
-
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 {
 	struct mptcp_pm_addr_entry *entry;
@@ -1050,7 +998,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 	entry->addr.port = 0;
 	entry->ifindex = 0;
 	entry->flags = 0;
-	entry->lsk = NULL;
 	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
 	if (ret < 0)
 		kfree(entry);
@@ -1258,19 +1205,9 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	*entry = addr;
-	if (entry->addr.port) {
-		ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
-		if (ret) {
-			GENL_SET_ERR_MSG(info, "create listen socket error");
-			kfree(entry);
-			return ret;
-		}
-	}
 	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
 	if (ret < 0) {
 		GENL_SET_ERR_MSG(info, "too many addresses or duplicate one");
-		if (entry->lsk)
-			sock_release(entry->lsk);
 		kfree(entry);
 		return ret;
 	}
@@ -1375,8 +1312,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 /* caller must ensure the RCU grace period is already elapsed */
 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
 {
-	if (entry->lsk)
-		sock_release(entry->lsk);
 	kfree(entry);
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
  2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
@ 2022-02-17 19:46     ` kernel test robot
  2022-02-17 19:47     ` kernel test robot
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2022-02-17 19:46 UTC (permalink / raw)
  To: Florian Westphal, mptcp; +Cc: kbuild-all, Florian Westphal

Hi Florian,

I love your patch! Yet something to improve:

[auto build test ERROR on mptcp/export]
[cannot apply to linus/master v5.17-rc4 next-20220217]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
base:   https://github.com/multipath-tcp/mptcp_net-next.git export
config: um-i386_defconfig (https://download.01.org/0day-ci/archive/20220218/202202180141.4Rse3JCH-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/044982520ba41e284eebe48421fad7feb55f2106
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
        git checkout 044982520ba41e284eebe48421fad7feb55f2106
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   net/mptcp/ctrl.c: In function '__mptcp_handle_join':
>> net/mptcp/ctrl.c:251:17: error: 'struct mptcp_pernet' has no member named 'join6'; did you mean 'join4'?
     251 |   lsk = pernet->join6.sk;
         |                 ^~~~~
         |                 join4


vim +251 net/mptcp/ctrl.c

   205	
   206	struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
   207	{
   208		struct mptcp_options_received mp_opt;
   209		struct mptcp_pernet *pernet;
   210		struct socket *ssock;
   211		struct sock *lsk;
   212		struct net *net;
   213	
   214		/* paranoia check: don't allow 0 destination port,
   215		 * else __inet_inherit_port will insert the child socket
   216		 * into the phony hash slot of the pernet listener.
   217		 */
   218		if (tcp_hdr(skb)->dest == 0)
   219			return NULL;
   220	
   221		mptcp_get_options(skb, &mp_opt);
   222	
   223		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
   224			return NULL;
   225	
   226		net = dev_net(skb_dst(skb)->dev);
   227		if (!mptcp_is_enabled(net))
   228			return NULL;
   229	
   230		/* RFC8684: If the token is unknown [..], the receiver will send
   231		 * back a reset (RST) signal, analogous to an unknown port in TCP,
   232		 * containing an MP_TCPRST option (Section 3.6) [..]
   233		 */
   234		if (!mptcp_token_exists(net, mp_opt.token)) {
   235			struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
   236	
   237			if (ext) {
   238				memset(ext, 0, sizeof(*ext));
   239				ext->reset_reason = MPTCP_RST_EMPTCP;
   240			}
   241			return NULL;
   242		}
   243	
   244		pernet = mptcp_get_pernet(net);
   245	
   246		switch (af) {
   247		case AF_INET:
   248			lsk = pernet->join4.sk;
   249			break;
   250		case AF_INET6:
 > 251			lsk = pernet->join6.sk;
   252			break;
   253		default:
   254			WARN_ON_ONCE(1);
   255			return NULL;
   256		}
   257	
   258		ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
   259		if (WARN_ON(!ssock))
   260			return NULL;
   261	
   262		return ssock->sk;
   263	}
   264	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
@ 2022-02-17 19:46     ` kernel test robot
  0 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2022-02-17 19:46 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3583 bytes --]

Hi Florian,

I love your patch! Yet something to improve:

[auto build test ERROR on mptcp/export]
[cannot apply to linus/master v5.17-rc4 next-20220217]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
base:   https://github.com/multipath-tcp/mptcp_net-next.git export
config: um-i386_defconfig (https://download.01.org/0day-ci/archive/20220218/202202180141.4Rse3JCH-lkp(a)intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/044982520ba41e284eebe48421fad7feb55f2106
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
        git checkout 044982520ba41e284eebe48421fad7feb55f2106
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   net/mptcp/ctrl.c: In function '__mptcp_handle_join':
>> net/mptcp/ctrl.c:251:17: error: 'struct mptcp_pernet' has no member named 'join6'; did you mean 'join4'?
     251 |   lsk = pernet->join6.sk;
         |                 ^~~~~
         |                 join4


vim +251 net/mptcp/ctrl.c

   205	
   206	struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
   207	{
   208		struct mptcp_options_received mp_opt;
   209		struct mptcp_pernet *pernet;
   210		struct socket *ssock;
   211		struct sock *lsk;
   212		struct net *net;
   213	
   214		/* paranoia check: don't allow 0 destination port,
   215		 * else __inet_inherit_port will insert the child socket
   216		 * into the phony hash slot of the pernet listener.
   217		 */
   218		if (tcp_hdr(skb)->dest == 0)
   219			return NULL;
   220	
   221		mptcp_get_options(skb, &mp_opt);
   222	
   223		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
   224			return NULL;
   225	
   226		net = dev_net(skb_dst(skb)->dev);
   227		if (!mptcp_is_enabled(net))
   228			return NULL;
   229	
   230		/* RFC8684: If the token is unknown [..], the receiver will send
   231		 * back a reset (RST) signal, analogous to an unknown port in TCP,
   232		 * containing an MP_TCPRST option (Section 3.6) [..]
   233		 */
   234		if (!mptcp_token_exists(net, mp_opt.token)) {
   235			struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
   236	
   237			if (ext) {
   238				memset(ext, 0, sizeof(*ext));
   239				ext->reset_reason = MPTCP_RST_EMPTCP;
   240			}
   241			return NULL;
   242		}
   243	
   244		pernet = mptcp_get_pernet(net);
   245	
   246		switch (af) {
   247		case AF_INET:
   248			lsk = pernet->join4.sk;
   249			break;
   250		case AF_INET6:
 > 251			lsk = pernet->join6.sk;
   252			break;
   253		default:
   254			WARN_ON_ONCE(1);
   255			return NULL;
   256		}
   257	
   258		ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
   259		if (WARN_ON(!ssock))
   260			return NULL;
   261	
   262		return ssock->sk;
   263	}
   264	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
  2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
@ 2022-02-17 19:47     ` kernel test robot
  2022-02-17 19:47     ` kernel test robot
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2022-02-17 19:47 UTC (permalink / raw)
  To: Florian Westphal, mptcp; +Cc: llvm, kbuild-all, Florian Westphal

Hi Florian,

I love your patch! Yet something to improve:

[auto build test ERROR on mptcp/export]
[cannot apply to linus/master v5.17-rc4 next-20220217]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
base:   https://github.com/multipath-tcp/mptcp_net-next.git export
config: mips-buildonly-randconfig-r002-20220217 (https://download.01.org/0day-ci/archive/20220218/202202180141.VfybVriM-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d271fc04d5b97b12e6b797c6067d3c96a8d7470e)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install mips cross compiling tool for clang build
        # apt-get install binutils-mips-linux-gnu
        # https://github.com/0day-ci/linux/commit/044982520ba41e284eebe48421fad7feb55f2106
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
        git checkout 044982520ba41e284eebe48421fad7feb55f2106
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash net/mptcp/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> net/mptcp/ctrl.c:251:17: error: no member named 'join6' in 'struct mptcp_pernet'; did you mean 'join4'?
                   lsk = pernet->join6.sk;
                                 ^~~~~
                                 join4
   net/mptcp/ctrl.c:49:23: note: 'join4' declared here
           struct mptcp_join_sk join4;
                                ^
   1 error generated.


vim +251 net/mptcp/ctrl.c

   205	
   206	struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
   207	{
   208		struct mptcp_options_received mp_opt;
   209		struct mptcp_pernet *pernet;
   210		struct socket *ssock;
   211		struct sock *lsk;
   212		struct net *net;
   213	
   214		/* paranoia check: don't allow 0 destination port,
   215		 * else __inet_inherit_port will insert the child socket
   216		 * into the phony hash slot of the pernet listener.
   217		 */
   218		if (tcp_hdr(skb)->dest == 0)
   219			return NULL;
   220	
   221		mptcp_get_options(skb, &mp_opt);
   222	
   223		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
   224			return NULL;
   225	
   226		net = dev_net(skb_dst(skb)->dev);
   227		if (!mptcp_is_enabled(net))
   228			return NULL;
   229	
   230		/* RFC8684: If the token is unknown [..], the receiver will send
   231		 * back a reset (RST) signal, analogous to an unknown port in TCP,
   232		 * containing an MP_TCPRST option (Section 3.6) [..]
   233		 */
   234		if (!mptcp_token_exists(net, mp_opt.token)) {
   235			struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
   236	
   237			if (ext) {
   238				memset(ext, 0, sizeof(*ext));
   239				ext->reset_reason = MPTCP_RST_EMPTCP;
   240			}
   241			return NULL;
   242		}
   243	
   244		pernet = mptcp_get_pernet(net);
   245	
   246		switch (af) {
   247		case AF_INET:
   248			lsk = pernet->join4.sk;
   249			break;
   250		case AF_INET6:
 > 251			lsk = pernet->join6.sk;
   252			break;
   253		default:
   254			WARN_ON_ONCE(1);
   255			return NULL;
   256		}
   257	
   258		ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
   259		if (WARN_ON(!ssock))
   260			return NULL;
   261	
   262		return ssock->sk;
   263	}
   264	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
@ 2022-02-17 19:47     ` kernel test robot
  0 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2022-02-17 19:47 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4098 bytes --]

Hi Florian,

I love your patch! Yet something to improve:

[auto build test ERROR on mptcp/export]
[cannot apply to linus/master v5.17-rc4 next-20220217]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
base:   https://github.com/multipath-tcp/mptcp_net-next.git export
config: mips-buildonly-randconfig-r002-20220217 (https://download.01.org/0day-ci/archive/20220218/202202180141.VfybVriM-lkp(a)intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d271fc04d5b97b12e6b797c6067d3c96a8d7470e)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install mips cross compiling tool for clang build
        # apt-get install binutils-mips-linux-gnu
        # https://github.com/0day-ci/linux/commit/044982520ba41e284eebe48421fad7feb55f2106
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Florian-Westphal/mptcp-replace-per-addr-listener-sockets/20220217-222844
        git checkout 044982520ba41e284eebe48421fad7feb55f2106
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash net/mptcp/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> net/mptcp/ctrl.c:251:17: error: no member named 'join6' in 'struct mptcp_pernet'; did you mean 'join4'?
                   lsk = pernet->join6.sk;
                                 ^~~~~
                                 join4
   net/mptcp/ctrl.c:49:23: note: 'join4' declared here
           struct mptcp_join_sk join4;
                                ^
   1 error generated.


vim +251 net/mptcp/ctrl.c

   205	
   206	struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
   207	{
   208		struct mptcp_options_received mp_opt;
   209		struct mptcp_pernet *pernet;
   210		struct socket *ssock;
   211		struct sock *lsk;
   212		struct net *net;
   213	
   214		/* paranoia check: don't allow 0 destination port,
   215		 * else __inet_inherit_port will insert the child socket
   216		 * into the phony hash slot of the pernet listener.
   217		 */
   218		if (tcp_hdr(skb)->dest == 0)
   219			return NULL;
   220	
   221		mptcp_get_options(skb, &mp_opt);
   222	
   223		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
   224			return NULL;
   225	
   226		net = dev_net(skb_dst(skb)->dev);
   227		if (!mptcp_is_enabled(net))
   228			return NULL;
   229	
   230		/* RFC8684: If the token is unknown [..], the receiver will send
   231		 * back a reset (RST) signal, analogous to an unknown port in TCP,
   232		 * containing an MP_TCPRST option (Section 3.6) [..]
   233		 */
   234		if (!mptcp_token_exists(net, mp_opt.token)) {
   235			struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
   236	
   237			if (ext) {
   238				memset(ext, 0, sizeof(*ext));
   239				ext->reset_reason = MPTCP_RST_EMPTCP;
   240			}
   241			return NULL;
   242		}
   243	
   244		pernet = mptcp_get_pernet(net);
   245	
   246		switch (af) {
   247		case AF_INET:
   248			lsk = pernet->join4.sk;
   249			break;
   250		case AF_INET6:
 > 251			lsk = pernet->join6.sk;
   252			break;
   253		default:
   254			WARN_ON_ONCE(1);
   255			return NULL;
   256		}
   257	
   258		ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
   259		if (WARN_ON(!ssock))
   260			return NULL;
   261	
   262		return ssock->sk;
   263	}
   264	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks
  2022-02-17 14:25 ` [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks Florian Westphal
@ 2022-02-17 21:58   ` Paolo Abeni
  2022-02-18  7:29     ` Florian Westphal
  0 siblings, 1 reply; 16+ messages in thread
From: Paolo Abeni @ 2022-02-17 21:58 UTC (permalink / raw)
  To: Florian Westphal, mptcp

On Thu, 2022-02-17 at 15:25 +0100, Florian Westphal wrote:
> Split from the next patch to make core tcp changes more obvious:
> add a dummy function that gets called after tcp socket demux came up
> empty.
> 
> This will be used by mptcp to check if a tcp syn contains an mptcp
> join option with a valid token (connection id).
> 
> If so, a hidden pernet mptcp listener socket is returned and packet
> resumes normally.
> 
> This patch series does not cover timewait sockets so far.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>

Following-up todays mtg discussion WRT filtering incoming MPJ on
existing endpoint. 

If I read correctly, subflow_syn_recv_sock() will filter the incoming
packets vs the annonced list, but only if the incoming packet's
destination port is different from the listener port.

- incoming request targeting a bad address and/or port will reach the
syn-ack before being rejected.

- incoming request targeting port 0 will likely be always accepted.

What about moving the anno_list check in __mptcp_handle_join()? Should
address the 2 above points and keep the anno_list traversals to the
bare minimum.

WDYT?

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
  2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
  2022-02-17 19:46     ` kernel test robot
  2022-02-17 19:47     ` kernel test robot
@ 2022-02-17 23:52   ` Mat Martineau
  2022-02-20 22:01     ` Florian Westphal
  2022-02-23 18:07   ` Kishen Maloor
  3 siblings, 1 reply; 16+ messages in thread
From: Mat Martineau @ 2022-02-17 23:52 UTC (permalink / raw)
  To: Florian Westphal; +Cc: mptcp

On Thu, 17 Feb 2022, Florian Westphal wrote:

> Currently mptcp adds kernel-based listener socket for all
> netlink-configured mptcp address endpoints.
>
> This has caveats because kernel may interfere with unrelated programs
> that use same address/port pairs.
>
> RFC 8684 says:
> Demultiplexing subflow SYNs MUST be done using the token; this is
> unlike traditional TCP, where the destination port is used for
> demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
> packets is done using the 5-tuple, as in traditional TCP.
>
> This patch deviates from this in that it retrains the existing checks of

Typo: retains

> verifying the incoming requests destination vs. the list of announced
> addresses.
>
> This can be relaxed later if deemed appropriate.
>
> Furthermore, TCP-only listeners take precedence: An MPTCP peer
> MUST NOT announce address:port pairs that are already in use by a
> non-mptcp listener.
> This could be changed, but it requires move of mptcp_handle_join()
> hook *before* the tcp port demux, i.e. an additional conditional in
> hotpath.  As-is, the additional conditional (syn && !rst && ...) is
> in the 'no socket found' path.
>
> The pernet 'listening' socket is hidden from userspace.
> It is not part of any hashes and not bound to any address or port.
>
> TPROXY-like semantics apply: If tcp demux cannot find a port for a given
> packet, check if the packet is a syn packet with a valid join token.
>
> If so, the pernet listener is returned and tcp processing resumes.
> Otherwise, handling is intentical.

Typo: identical

>
> This patch does not cover timewait sockets.
>

Can you elaborate on what properly covering timewait sockets would look 
like? Does the approach used by multipath-tcp.org (handling token lookup 
after inet_lookup_listener() / case TCP_TW_SYN in tcp_v4_rcv()) fit?


I have one more comment below, but other than that the code that's here is 
looking good and the big outstanding questions are the things you noted as 
not-yet-changed in the cover letter (MP_JOIN token lookup for established 
or TCP listening sockets). We did discuss that in today's meeting so I 
won't rehash here.


> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> include/net/mptcp.h  |  10 ++
> net/ipv6/tcp_ipv6.c  |  19 ++--
> net/mptcp/ctrl.c     | 211 ++++++++++++++++++++++++++++++++++++++++++-
> net/mptcp/protocol.c |   2 +-
> net/mptcp/protocol.h |   2 +-
> net/mptcp/subflow.c  |   3 +
> 6 files changed, 233 insertions(+), 14 deletions(-)
>

...

> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> index ae20b7d92e28..6358c803ba12 100644
> --- a/net/mptcp/ctrl.c
> +++ b/net/mptcp/ctrl.c

...

> @@ -185,13 +203,173 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
>
> #endif /* CONFIG_SYSCTL */
>
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
> +{
> +	struct mptcp_options_received mp_opt;
> +	struct mptcp_pernet *pernet;
> +	struct socket *ssock;
> +	struct sock *lsk;
> +	struct net *net;
> +
> +	/* paranoia check: don't allow 0 destination port,
> +	 * else __inet_inherit_port will insert the child socket
> +	 * into the phony hash slot of the pernet listener.
> +	 */
> +	if (tcp_hdr(skb)->dest == 0)
> +		return NULL;
> +
> +	mptcp_get_options(skb, &mp_opt);
> +
> +	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
> +		return NULL;
> +
> +	net = dev_net(skb_dst(skb)->dev);
> +	if (!mptcp_is_enabled(net))
> +		return NULL;
> +
> +	/* RFC8684: If the token is unknown [..], the receiver will send
> +	 * back a reset (RST) signal, analogous to an unknown port in TCP,
> +	 * containing an MP_TCPRST option (Section 3.6) [..]
> +	 */
> +	if (!mptcp_token_exists(net, mp_opt.token)) {
> +		struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
> +
> +		if (ext) {
> +			memset(ext, 0, sizeof(*ext));
> +			ext->reset_reason = MPTCP_RST_EMPTCP;
> +		}
> +		return NULL;
> +	}
> +
> +	pernet = mptcp_get_pernet(net);
> +
> +	switch (af) {
> +	case AF_INET:
> +		lsk = pernet->join4.sk;
> +		break;
> +	case AF_INET6:
> +		lsk = pernet->join6.sk;
> +		break;

As kbuild noted, missing an

#if IS_ENABLED(CONFIG_MPTCP_IPV6)

check here.


--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks
  2022-02-17 21:58   ` Paolo Abeni
@ 2022-02-18  7:29     ` Florian Westphal
  2022-02-18  8:49       ` Paolo Abeni
  0 siblings, 1 reply; 16+ messages in thread
From: Florian Westphal @ 2022-02-18  7:29 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: Florian Westphal, mptcp

Paolo Abeni <pabeni@redhat.com> wrote:
> If I read correctly, subflow_syn_recv_sock() will filter the incoming
> packets vs the annonced list, but only if the incoming packet's
> destination port is different from the listener port.

Same as today, if it hit listener port then no issue...?
The listener port isn't in the announced list, so the check would fail.

> - incoming request targeting a bad address and/or port will reach the
> syn-ack before being rejected.

Yes.

> - incoming request targeting port 0 will likely be always accepted.

No, port 0 is illegal, there is an explicit check that prevents
such port from being wired up to the magic listener.

> What about moving the anno_list check in __mptcp_handle_join()? Should
> address the 2 above points and keep the anno_list traversals to the
> bare minimum.

I can have a look.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks
  2022-02-18  7:29     ` Florian Westphal
@ 2022-02-18  8:49       ` Paolo Abeni
  0 siblings, 0 replies; 16+ messages in thread
From: Paolo Abeni @ 2022-02-18  8:49 UTC (permalink / raw)
  To: Florian Westphal, Mat Martineau; +Cc: mptcp

On Fri, 2022-02-18 at 08:29 +0100, Florian Westphal wrote:
> Paolo Abeni <pabeni@redhat.com> wrote:
> > If I read correctly, subflow_syn_recv_sock() will filter the incoming
> > packets vs the annonced list, but only if the incoming packet's
> > destination port is different from the listener port.
> 
> Same as today, if it hit listener port then no issue...?
> The listener port isn't in the announced list, so the check would fail.

Side not not related to this series: It looks like that if we have 2
MPTCP listeners on different addresses and/or ports, and no 'signal'
endpoints, the kernel will accept MPJ on either sockets for any mptcp
connection, regardless of the originating listener. I'm unsure if that
is expeted/fits the RFC fully. 
@Mat: ^^^ WDYT?

> 
> > - incoming request targeting a bad address and/or port will reach the
> > syn-ack before being rejected.
> 
> Yes.
> 
> > - incoming request targeting port 0 will likely be always accepted.
> 
> No, port 0 is illegal, there is an explicit check that prevents
> such port from being wired up to the magic listener.

whoops, I missed that check, despite the huge comment. Sorry for the
noise here.

> > What about moving the anno_list check in __mptcp_handle_join()? Should
> > address the 2 above points and keep the anno_list traversals to the
> > bare minimum.
> 
> I can have a look.

Thanks! It looks like some minor changes are needed to handle !ipv6
build.

Cheers,
Paolo


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
  2022-02-17 23:52   ` Mat Martineau
@ 2022-02-20 22:01     ` Florian Westphal
  0 siblings, 0 replies; 16+ messages in thread
From: Florian Westphal @ 2022-02-20 22:01 UTC (permalink / raw)
  To: Mat Martineau; +Cc: Florian Westphal, mptcp

Mat Martineau <mathew.j.martineau@linux.intel.com> wrote:
> > If so, the pernet listener is returned and tcp processing resumes.
> > Otherwise, handling is intentical.
> 
> Typo: identical

Thanks, fixed both typos.

> > This patch does not cover timewait sockets.
> > 
> Can you elaborate on what properly covering timewait sockets would look
> like? Does the approach used by multipath-tcp.org (handling token lookup
> after inet_lookup_listener() / case TCP_TW_SYN in tcp_v4_rcv()) fit?

Looks like its as simple as:

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2189,6 +2189,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
 							iph->daddr, th->dest,
 							inet_iif(skb),
 							sdif);
+		if (!sk2)
+			sk2 = mptcp_handle_join(AF_INET, skb);
+
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3b8608d35dcd..f2f2308c6bda 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1833,6 +1833,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 					    ntohs(th->dest),
 					    tcp_v6_iif_l3_slave(skb),
 					    sdif);
+		if (!sk2)
+			sk2 = mptcp_handle_join(AF_INET6, skb);
+
 		if (sk2) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
 			inet_twsk_deschedule_put(tw);

I think I'll squash this into the previous patch and will just remove
the timewait-socket bit from this patch.

> > +	case AF_INET6:
> > +		lsk = pernet->join6.sk;
> > +		break;
> 
> As kbuild noted, missing an
> 
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> 
> check here.

Added.

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket
  2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
                     ` (2 preceding siblings ...)
  2022-02-17 23:52   ` Mat Martineau
@ 2022-02-23 18:07   ` Kishen Maloor
  3 siblings, 0 replies; 16+ messages in thread
From: Kishen Maloor @ 2022-02-23 18:07 UTC (permalink / raw)
  To: Florian Westphal, mptcp

On 2/17/22 6:25 AM, Florian Westphal wrote:
> Currently mptcp adds kernel-based listener socket for all
> netlink-configured mptcp address endpoints.
> 
> This has caveats because kernel may interfere with unrelated programs
> that use same address/port pairs.
> 

I assume that this refers to the potential race between a kernel listener and
the application which Paolo had raised. I'm not sure if this change eliminates that 
possibility. Pasting Paolo's example below from the prior discussion:

"""
s1 = socket()
bind(s1, A)
listen(s1)
// at this point incoming MPTCP connection can be established on s1
// and ADD_ADDR sub-options could be sent back

s2 = socket()
bind(s2, B)
listen(s2)
"""

Over a new MPTCP connection following listen(s1), the PM can issue an ADD_ADDR with B. In light
of this change there would be no listener created for B. But if the remote endpoint immediately
established a subflow in response, then that would result in a subflow (connection) socket at B. 
It appears (and correct me if I'm wrong) that bind(s2, B) might fail at this point (?).
In other words, it seems like the subflow creation could race with a subsequent bind()
causing startup issues in the application.

If my assessment is correct, then I think this change isn't sidestepping the issue  
that motivated it in the first place. However, I do like the fact that we don't need
code to manage kernel listeners, just so long as there are no other breaking corner cases
and the TCP level changes are acceptable upstream.

> RFC 8684 says:
>  Demultiplexing subflow SYNs MUST be done using the token; this is
>  unlike traditional TCP, where the destination port is used for
>  demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
>  packets is done using the 5-tuple, as in traditional TCP.
> 
> This patch deviates from this in that it retrains the existing checks of
> verifying the incoming requests destination vs. the list of announced
> addresses.
> 
> This can be relaxed later if deemed appropriate.
> 
> Furthermore, TCP-only listeners take precedence: An MPTCP peer
> MUST NOT announce address:port pairs that are already in use by a
> non-mptcp listener.
> This could be changed, but it requires move of mptcp_handle_join()
> hook *before* the tcp port demux, i.e. an additional conditional in
> hotpath.  As-is, the additional conditional (syn && !rst && ...) is
> in the 'no socket found' path.
> 
> The pernet 'listening' socket is hidden from userspace.
> It is not part of any hashes and not bound to any address or port.
> 
> TPROXY-like semantics apply: If tcp demux cannot find a port for a given
> packet, check if the packet is a syn packet with a valid join token.
> 
> If so, the pernet listener is returned and tcp processing resumes.
> Otherwise, handling is intentical.
> 
> This patch does not cover timewait sockets.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  include/net/mptcp.h  |  10 ++
>  net/ipv6/tcp_ipv6.c  |  19 ++--
>  net/mptcp/ctrl.c     | 211 ++++++++++++++++++++++++++++++++++++++++++-
>  net/mptcp/protocol.c |   2 +-
>  net/mptcp/protocol.h |   2 +-
>  net/mptcp/subflow.c  |   3 +
>  6 files changed, 233 insertions(+), 14 deletions(-)
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 5ee422b56902..49c188b978e1 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
>  				  struct sk_buff *skb);
>  
>  __be32 mptcp_get_reset_option(const struct sk_buff *skb);
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
>  
>  static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
>  {
> @@ -199,6 +200,11 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
>  }
>  static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
>  {
> +	const struct tcphdr *th = tcp_hdr(skb);
> +
> +	if (th->syn && !th->ack && !th->rst && !th->fin)
> +		return __mptcp_handle_join(af, skb);
> +
>  	return NULL;
>  }
>  #else
> @@ -283,9 +289,13 @@ static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { retu
>  
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>  int mptcpv6_init(void);
> +int mptcpv6_init_net(struct net *net);
> +void mptcpv6_exit_net(struct net *net);
>  void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
>  #elif IS_ENABLED(CONFIG_IPV6)
>  static inline int mptcpv6_init(void) { return 0; }
> +static inline int mptcpv6_init_net(struct net *net) { return 0; }
> +static inline void mptcpv6_exit_net(struct net *net) { }
>  static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
>  #endif
>  
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 788040db8e9e..3b8608d35dcd 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2233,13 +2233,22 @@ static struct inet_protosw tcpv6_protosw = {
>  
>  static int __net_init tcpv6_net_init(struct net *net)
>  {
> -	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> -				    SOCK_RAW, IPPROTO_TCP, net);
> +	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> +				       SOCK_RAW, IPPROTO_TCP, net);
> +	if (err)
> +		return err;
> +
> +	err = mptcpv6_init_net(net);
> +	if (err)
> +		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +
> +	return err;
>  }
>  
>  static void __net_exit tcpv6_net_exit(struct net *net)
>  {
>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +	mptcpv6_exit_net(net);
>  }
>  
>  static struct pernet_operations tcpv6_net_ops = {
> @@ -2264,15 +2273,9 @@ int __init tcpv6_init(void)
>  	if (ret)
>  		goto out_tcpv6_protosw;
>  
> -	ret = mptcpv6_init();
> -	if (ret)
> -		goto out_tcpv6_pernet_subsys;
> -
>  out:
>  	return ret;
>  
> -out_tcpv6_pernet_subsys:
> -	unregister_pernet_subsys(&tcpv6_net_ops);
>  out_tcpv6_protosw:
>  	inet6_unregister_protosw(&tcpv6_protosw);
>  out_tcpv6_protocol:
> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> index ae20b7d92e28..6358c803ba12 100644
> --- a/net/mptcp/ctrl.c
> +++ b/net/mptcp/ctrl.c
> @@ -21,6 +21,12 @@ static int mptcp_pernet_id;
>  static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
>  #endif
>  
> +struct mptcp_join_sk {
> +	struct sock *sk;
> +	struct inet_bind_bucket *tb;
> +	struct inet_bind_hashbucket head;
> +};
> +
>  struct mptcp_pernet {
>  #ifdef CONFIG_SYSCTL
>  	struct ctl_table_header *ctl_table_hdr;
> @@ -32,6 +38,18 @@ struct mptcp_pernet {
>  	u8 checksum_enabled;
>  	u8 allow_join_initial_addr_port;
>  	u8 pm_type;
> +
> +	/* pernet listener to handle mptcp join requests
> +	 * based on the mptcp token.
> +	 *
> +	 * Has to be pernet because tcp uses
> +	 * sock_net(sk_listener) to obtain the net namespace for
> +	 * the syn/ack route lookup.
> +	 */
> +	struct mptcp_join_sk join4;
> +#if IS_ENABLED(CONFIG_MPTCP_IPV6)
> +	struct mptcp_join_sk join6;
> +#endif
>  };
>  
>  static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
> @@ -185,13 +203,173 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
>  
>  #endif /* CONFIG_SYSCTL */
>  
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
> +{
> +	struct mptcp_options_received mp_opt;
> +	struct mptcp_pernet *pernet;
> +	struct socket *ssock;
> +	struct sock *lsk;
> +	struct net *net;
> +
> +	/* paranoia check: don't allow 0 destination port,
> +	 * else __inet_inherit_port will insert the child socket
> +	 * into the phony hash slot of the pernet listener.
> +	 */
> +	if (tcp_hdr(skb)->dest == 0)
> +		return NULL;
> +
> +	mptcp_get_options(skb, &mp_opt);
> +
> +	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
> +		return NULL;
> +
> +	net = dev_net(skb_dst(skb)->dev);
> +	if (!mptcp_is_enabled(net))
> +		return NULL;
> +
> +	/* RFC8684: If the token is unknown [..], the receiver will send
> +	 * back a reset (RST) signal, analogous to an unknown port in TCP,
> +	 * containing an MP_TCPRST option (Section 3.6) [..]
> +	 */
> +	if (!mptcp_token_exists(net, mp_opt.token)) {
> +		struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
> +
> +		if (ext) {
> +			memset(ext, 0, sizeof(*ext));
> +			ext->reset_reason = MPTCP_RST_EMPTCP;
> +		}
> +		return NULL;
> +	}
> +
> +	pernet = mptcp_get_pernet(net);
> +
> +	switch (af) {
> +	case AF_INET:
> +		lsk = pernet->join4.sk;
> +		break;
> +	case AF_INET6:
> +		lsk = pernet->join6.sk;
> +		break;
> +	default:
> +		WARN_ON_ONCE(1);
> +		return NULL;
> +	}
> +
> +	ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
> +	if (WARN_ON(!ssock))
> +		return NULL;
> +
> +	return ssock->sk;
> +}
> +
> +static struct socket *mptcp_create_join_listen_socket(struct net *net, int af)
> +{
> +	struct socket *s, *ssock;
> +	int err;
> +
> +	err = sock_create_kern(net, af, SOCK_STREAM, IPPROTO_MPTCP, &s);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	ssock = __mptcp_nmpc_socket(mptcp_sk(s->sk));
> +	if (!ssock) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	ssock->sk->sk_max_ack_backlog = SOMAXCONN;
> +	inet_sk_state_store(ssock->sk, TCP_LISTEN);
> +
> +	s->sk->sk_max_ack_backlog = SOMAXCONN;
> +	inet_sk_state_store(s->sk, TCP_LISTEN);
> +
> +	s->sk->sk_net_refcnt = 1;
> +	get_net_track(net, &s->sk->ns_tracker, GFP_KERNEL);
> +	sock_inuse_add(net, 1);
> +
> +	return s;
> +out:
> +	sock_release(s);
> +	return ERR_PTR(err);
> +}
> +
> +static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_join_sk *join_sk)
> +{
> +	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(sk));
> +	struct inet_hashinfo *table = ssock->sk->sk_prot->h.hashinfo;
> +	struct inet_bind_bucket *tb;
> +
> +	spin_lock_init(&join_sk->head.lock);
> +	INIT_HLIST_HEAD(&join_sk->head.chain);
> +
> +	/* Our "listen socket" isn't bound to any address or port.
> +	 * Conceptually, SYN packet with mptcp join request are steered to
> +	 * this pernet socket just like TPROXY steals arbitrary connection
> +	 * requests to assign them to listening socket with different
> +	 * address or port.
> +	 *
> +	 * The bind_bucket is needed for sake of __inet_inherit_port(),
> +	 * so it can place the new child socket in the correct
> +	 * bind_bucket slot.
> +	 *
> +	 * A phony head is used to hide this socket from normal sk loookup.
> +	 */
> +	tb = inet_bind_bucket_create(table->bind_bucket_cachep,
> +				     net, &join_sk->head, 0, 0);
> +	if (!tb)
> +		return -ENOMEM;
> +
> +	inet_csk(ssock->sk)->icsk_bind_hash = tb;
> +	return 0;
> +}
> +
>  static int __net_init mptcp_net_init(struct net *net)
>  {
>  	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +	struct socket *sock;
> +	int err;
>  
>  	mptcp_pernet_set_defaults(pernet);
>  
> -	return mptcp_pernet_new_table(net, pernet);
> +	err = mptcp_pernet_new_table(net, pernet);
> +	if (err)
> +		return err;
> +
> +	sock = mptcp_create_join_listen_socket(net, AF_INET);
> +	if (IS_ERR(sock)) {
> +		err = PTR_ERR(sock);
> +		goto out_table;
> +	}
> +
> +	err = mptcp_init_join_sk(net, sock->sk, &pernet->join4);
> +	if (err) {
> +		sock_release(sock);
> +		goto out_table;
> +	}
> +
> +	/* struct sock is still reachable via sock->sk_socket backpointer */
> +	pernet->join4.sk = sock->sk;
> +	return err;
> +
> +out_table:
> +	if (!net_eq(net, &init_net))
> +		mptcp_pernet_del_table(pernet);
> +	return err;
> +}
> +
> +static void __net_exit mptcp_exit_join_sk(struct mptcp_join_sk *jsk)
> +{
> +	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(jsk->sk));
> +	struct inet_bind_bucket *tb;
> +	struct inet_hashinfo *table;
> +
> +	table = ssock->sk->sk_prot->h.hashinfo;
> +
> +	tb = inet_csk(ssock->sk)->icsk_bind_hash;
> +	inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
> +
> +	ssock = jsk->sk->sk_socket;
> +	sock_release(ssock);
>  }
>  
>  /* Note: the callback will only be called per extra netns */
> @@ -200,6 +378,7 @@ static void __net_exit mptcp_net_exit(struct net *net)
>  	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
>  
>  	mptcp_pernet_del_table(pernet);
> +	mptcp_exit_join_sk(&pernet->join4);
>  }
>  
>  static struct pernet_operations mptcp_pernet_ops = {
> @@ -219,12 +398,36 @@ void __init mptcp_init(void)
>  }
>  
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> -int __init mptcpv6_init(void)
> +int __net_init mptcpv6_init_net(struct net *net)
>  {
> +	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +	struct socket *sock;
>  	int err;
>  
> -	err = mptcp_proto_v6_init();
> +	if (net_eq(net, &init_net)) {
> +		err = mptcp_proto_v6_init();
> +		if (err)
> +			return err;
> +	}
>  
> -	return err;
> +	sock = mptcp_create_join_listen_socket(net, AF_INET6);
> +	if (IS_ERR(sock))
> +		return PTR_ERR(sock);
> +
> +	err = mptcp_init_join_sk(net, sock->sk, &pernet->join6);
> +	if (err) {
> +		sock_release(sock);
> +		return err;
> +	}
> +
> +	pernet->join6.sk = sock->sk;
> +	return 0;
> +}
> +
> +void __net_exit mptcpv6_exit_net(struct net *net)
> +{
> +	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +
> +	mptcp_exit_join_sk(&pernet->join6);
>  }
>  #endif
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 4599bde215b2..5b54e3c8efea 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -3777,7 +3777,7 @@ static struct inet_protosw mptcp_v6_protosw = {
>  	.flags		= INET_PROTOSW_ICSK,
>  };
>  
> -int __init mptcp_proto_v6_init(void)
> +int __net_init mptcp_proto_v6_init(void)
>  {
>  	int err;
>  
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 7bd064b68b51..6a81e2a21301 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -647,7 +647,7 @@ static inline bool mptcp_has_another_subflow(struct sock *ssk)
>  
>  void __init mptcp_proto_init(void);
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> -int __init mptcp_proto_v6_init(void);
> +int __net_init mptcp_proto_v6_init(void);
>  #endif
>  
>  struct sock *mptcp_sk_clone(const struct sock *sk,
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 8be20f7b76df..4696d27a8994 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -116,6 +116,9 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
>  
>  static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
>  {
> +	if (inet_sk(sk)->inet_sport == 0)
> +		return true;
> +
>  	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
>  }
>  


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-02-23 18:07 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-17 14:25 [PATCH mptcp-next v2 0/5] mptcp: replace per-addr listener sockets Florian Westphal
2022-02-17 14:25 ` [PATCH mptcp-next v2 1/5] mptcp: check netns in mptcp_token_exists Florian Westphal
2022-02-17 14:25 ` [PATCH mptcp-next v2 2/5] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
2022-02-17 14:25 ` [PATCH mptcp-next v2 3/5] tcp: add mptcp join demultiplex hooks Florian Westphal
2022-02-17 21:58   ` Paolo Abeni
2022-02-18  7:29     ` Florian Westphal
2022-02-18  8:49       ` Paolo Abeni
2022-02-17 14:25 ` [PATCH mptcp-next v2 4/5] mptcp: handle join requests via pernet listen socket Florian Westphal
2022-02-17 19:46   ` kernel test robot
2022-02-17 19:46     ` kernel test robot
2022-02-17 19:47   ` kernel test robot
2022-02-17 19:47     ` kernel test robot
2022-02-17 23:52   ` Mat Martineau
2022-02-20 22:01     ` Florian Westphal
2022-02-23 18:07   ` Kishen Maloor
2022-02-17 14:25 ` [PATCH mptcp-next v2 5/5] mptcp: remove per-address listening sockets Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.