[PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets
@ 2022-02-10 15:29 Florian Westphal
  2022-02-10 15:29 ` [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
                   ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-10 15:29 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

This series introduces a hidden per-netns pseudo "listener"
socket to handle mptcp join requests with a valid token but that
do not match an existing listening socket.

First patch is a minor preparation patch: MPTCP Join requests packets
that fail to find a suitable socket by means of standard address/port
demultiplexing will be steered to a pseudo-listener, similar to TPROXY
interception.

This pseudo listener isn't bound to an address or port (all zero), so
we need to fetch the port number from the tcp header and not the
listener socket.

Patch two adds a stub to the tcp demux code.
This has no functionality, its extra to make tcp datapath change
stand out.

Third patch is the bulk work, it adds per netns listener and
implements token-based socket demultiplexing.

Last patch zaps the per-address sockets from mptcp, they are not
needed anymore.

Florian Westphal (4):
  mptcp: prefer ip address in syn skb instead of listen sk bound address
  tcp: add mptcp join demultiplex hooks
  mptcp: handle join requests via pernet listen socket
  mptcp: remove per-address listening sockets

 include/net/mptcp.h    |  15 +++
 net/ipv4/tcp_ipv4.c    |   4 +
 net/ipv6/tcp_ipv6.c    |  23 +++--
 net/mptcp/ctrl.c       | 214 ++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/pm_netlink.c |  82 +++-------------
 net/mptcp/protocol.c   |   2 +-
 net/mptcp/protocol.h   |   4 +-
 net/mptcp/subflow.c    |   8 +-
 8 files changed, 268 insertions(+), 84 deletions(-)

-- 
2.34.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address
  2022-02-10 15:29 [PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets Florian Westphal
@ 2022-02-10 15:29 ` Florian Westphal
  2022-02-11 10:34   ` Paolo Abeni
  2022-02-10 15:29 ` [PATCH mptcp-next 2/4] tcp: add mptcp join demultiplex hooks Florian Westphal
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 24+ messages in thread
From: Florian Westphal @ 2022-02-10 15:29 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Once we change mptcp to use tproxy-like scheme to steer mptcp join
requests to a special pernet socket, the 'sk bound address' becomes
meaningless because it will never be identical to the tcp dport/ip daddr
of the on-wire packet.

Prepare for this: pass the skbuff and use the packet data instead of
the address the listener socket is bound to.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/pm_netlink.c | 17 +++++++++++++++--
 net/mptcp/protocol.h   |  2 +-
 net/mptcp/subflow.c    |  5 +++--
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 98b485406afa..1696f6fb9baa 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -279,13 +279,26 @@ mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
 	return NULL;
 }
 
-bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
+static void skb_address(const struct sk_buff *skb,
+			struct mptcp_addr_info *addr)
+{
+	addr->port = tcp_hdr(skb)->dest;
+	if (addr->family == AF_INET)
+		addr->addr.s_addr = ip_hdr(skb)->daddr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	else if (addr->family == AF_INET6)
+		addr->addr6 = ipv6_hdr(skb)->daddr;
+#endif
+}
+
+bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, int af, const struct sk_buff *skb)
 {
 	struct mptcp_pm_add_entry *entry;
 	struct mptcp_addr_info saddr;
 	bool ret = false;
 
-	local_address((struct sock_common *)sk, &saddr);
+	saddr.family = af;
+	skb_address(skb, &saddr);
 
 	spin_lock_bh(&msk->pm.lock);
 	list_for_each_entry(entry, &msk->pm.anno_list, list) {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3937ea3f6759..03e3880d274d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -760,7 +760,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
-bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
+bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, int af, const struct sk_buff *skb);
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 		       struct mptcp_addr_info *addr, bool check_id);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e727d838da0e..d50cf555ea40 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -216,7 +216,8 @@ static int subflow_check_req(struct request_sock *req,
 			pr_debug("syn inet_sport=%d %d",
 				 ntohs(inet_sk(sk_listener)->inet_sport),
 				 ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
-			if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
+			if (!mptcp_pm_sport_in_anno_list(subflow_req->msk,
+							 sk_listener->sk_family, skb)) {
 				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
 				return -EPERM;
 			}
@@ -748,7 +749,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 				pr_debug("ack inet_sport=%d %d",
 					 ntohs(inet_sk(sk)->inet_sport),
 					 ntohs(inet_sk((struct sock *)owner)->inet_sport));
-				if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
+				if (!mptcp_pm_sport_in_anno_list(owner, sk->sk_family, skb)) {
 					SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
 					goto dispose_child;
 				}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH mptcp-next 2/4] tcp: add mptcp join demultiplex hooks
  2022-02-10 15:29 [PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets Florian Westphal
  2022-02-10 15:29 ` [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
@ 2022-02-10 15:29 ` Florian Westphal
  2022-02-10 15:29 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
  2022-02-10 15:29 ` [PATCH mptcp-next 4/4] mptcp: remove per-address listening sockets Florian Westphal
  3 siblings, 0 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-10 15:29 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Split from the next patch to make core tcp changes more obvious:
add a dummy function that gets called after tcp socket demux came up
empty.

This will be used by mptcp to check if a tcp syn contains an mptcp
join option with a valid token (connection id).

If so, a hidden pernet mptcp listener socket is returned and packet
resumes normally.

This patch series does not cover timewait sockets so far.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h | 5 +++++
 net/ipv4/tcp_ipv4.c | 4 ++++
 net/ipv6/tcp_ipv6.c | 4 ++++
 3 files changed, 13 insertions(+)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b1afd6f5cc4..5ee422b56902 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -197,6 +197,10 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 
 	return htonl(0u);
 }
+static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
+{
+	return NULL;
+}
 #else
 
 static inline void mptcp_init(void)
@@ -274,6 +278,7 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
 }
 
 static inline __be32 mptcp_reset_option(const struct sk_buff *skb)  { return htonl(0u); }
+static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { return NULL; }
 #endif /* CONFIG_MPTCP */
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6873f46fc8ba..6e6675a09443 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2140,6 +2140,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	sk = mptcp_handle_join(AF_INET, skb);
+	if (sk)
+		goto process;
+
 	tcp_v4_fill_cb(skb, iph, th);
 
 	if (tcp_checksum_complete(skb)) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0c648bf07f39..788040db8e9e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1782,6 +1782,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	sk = mptcp_handle_join(AF_INET6, skb);
+	if (sk)
+		goto process;
+
 	tcp_v6_fill_cb(skb, hdr, th);
 
 	if (tcp_checksum_complete(skb)) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-10 15:29 [PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets Florian Westphal
  2022-02-10 15:29 ` [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
  2022-02-10 15:29 ` [PATCH mptcp-next 2/4] tcp: add mptcp join demultiplex hooks Florian Westphal
@ 2022-02-10 15:29 ` Florian Westphal
  2022-02-11  2:03   ` Mat Martineau
                     ` (2 more replies)
  2022-02-10 15:29 ` [PATCH mptcp-next 4/4] mptcp: remove per-address listening sockets Florian Westphal
  3 siblings, 3 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-10 15:29 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Currently mptcp adds kernel-based listener socket for all
netlink-configured mptcp address endpoints.

This has caveats because kernel may interfere with unrelated programs
that use same address/port pairs.

RFC 8664 says:
 Demultiplexing subflow SYNs MUST be done using the token; this is
 unlike traditional TCP, where the destination port is used for
 demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
 packets is done using the 5-tuple, as in traditional TCP.

This patch deviates from this in that it retrains the existing checks of
verifying the incoming requests destination vs. the list of announced
addresses.

This can be relaxed later if deemed appropriate.

The pernet 'listening' socket is not a listening socket from userspace
point of view, it is not part of any hashes and not bound to any address
or port.

TPROXY-like semantics apply: If tcp demux cannot find a socket, check
if the packet is a join request with a valid token.

If so, the pernet listener is returned and tcp processing resumes.
Otherwise, handling is intentical as if there is no socket.

This patch does not handle timewait sockets.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h  |  10 ++
 net/ipv6/tcp_ipv6.c  |  19 ++--
 net/mptcp/ctrl.c     | 214 ++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.c |   2 +-
 net/mptcp/protocol.h |   2 +-
 net/mptcp/subflow.c  |   3 +
 6 files changed, 236 insertions(+), 14 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 5ee422b56902..49c188b978e1 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
 				  struct sk_buff *skb);
 
 __be32 mptcp_get_reset_option(const struct sk_buff *skb);
+struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
 
 static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 {
@@ -199,6 +200,11 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 }
 static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
 {
+	const struct tcphdr *th = tcp_hdr(skb);
+
+	if (th->syn && !th->ack && !th->rst && !th->fin)
+		return __mptcp_handle_join(af, skb);
+
 	return NULL;
 }
 #else
@@ -283,9 +289,13 @@ static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { retu
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 int mptcpv6_init(void);
+int mptcpv6_init_net(struct net *net);
+void mptcpv6_exit_net(struct net *net);
 void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
 #elif IS_ENABLED(CONFIG_IPV6)
 static inline int mptcpv6_init(void) { return 0; }
+static inline int mptcpv6_init_net(struct net *net) { return 0; }
+static inline void mptcpv6_exit_net(struct net *net) { }
 static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
 #endif
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 788040db8e9e..3b8608d35dcd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2233,13 +2233,22 @@ static struct inet_protosw tcpv6_protosw = {
 
 static int __net_init tcpv6_net_init(struct net *net)
 {
-	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
-				    SOCK_RAW, IPPROTO_TCP, net);
+	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+				       SOCK_RAW, IPPROTO_TCP, net);
+	if (err)
+		return err;
+
+	err = mptcpv6_init_net(net);
+	if (err)
+		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+
+	return err;
 }
 
 static void __net_exit tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+	mptcpv6_exit_net(net);
 }
 
 static struct pernet_operations tcpv6_net_ops = {
@@ -2264,15 +2273,9 @@ int __init tcpv6_init(void)
 	if (ret)
 		goto out_tcpv6_protosw;
 
-	ret = mptcpv6_init();
-	if (ret)
-		goto out_tcpv6_pernet_subsys;
-
 out:
 	return ret;
 
-out_tcpv6_pernet_subsys:
-	unregister_pernet_subsys(&tcpv6_net_ops);
 out_tcpv6_protosw:
 	inet6_unregister_protosw(&tcpv6_protosw);
 out_tcpv6_protocol:
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..bba345f092af 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -21,6 +21,12 @@ static int mptcp_pernet_id;
 static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
 #endif
 
+struct mptcp_join_sk {
+	struct sock *sk;
+	struct inet_bind_bucket *tb;
+	struct inet_bind_hashbucket head;
+};
+
 struct mptcp_pernet {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *ctl_table_hdr;
@@ -32,6 +38,18 @@ struct mptcp_pernet {
 	u8 checksum_enabled;
 	u8 allow_join_initial_addr_port;
 	u8 pm_type;
+
+	/* pernet listener to handle mptcp join requests
+	 * based on the mptcp token.
+	 *
+	 * Has to be pernet because tcp uses
+	 * sock_net(sk_listener) to obtain the net namespace for
+	 * the syn/ack route lookup.
+	 */
+	struct mptcp_join_sk join4;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	struct mptcp_join_sk join6;
+#endif
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -185,13 +203,176 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
 
 #endif /* CONFIG_SYSCTL */
 
+struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
+{
+	struct mptcp_options_received mp_opt;
+	struct mptcp_pernet *pernet;
+	struct mptcp_sock *msk;
+	struct socket *ssock;
+	struct sock *lsk;
+	struct net *net;
+
+	/* paranoia check: don't allow 0 destination port,
+	 * else __inet_inherit_port will insert the child socket
+	 * into the phony hash slot of the pernet listener.
+	 */
+	if (tcp_hdr(skb)->dest == 0)
+		return NULL;
+
+	mptcp_get_options(skb, &mp_opt);
+
+	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
+		return NULL;
+
+	net = dev_net(skb_dst(skb)->dev);
+	if (!mptcp_is_enabled(net))
+		return NULL;
+
+	/* RFC8684: If the token is unknown [..], the receiver will send
+	 * back a reset (RST) signal, analogous to an unknown port in TCP,
+	 * containing an MP_TCPRST option (Section 3.6) [..]
+	 */
+	msk = mptcp_token_get_sock(net, mp_opt.token);
+	if (!msk) {
+		struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
+
+		if (ext) {
+			memset(ext, 0, sizeof(*ext));
+			ext->reset_reason = MPTCP_RST_EMPTCP;
+		}
+		return NULL;
+	}
+
+	sock_put((struct sock *)msk);
+	pernet = mptcp_get_pernet(net);
+
+	switch (af) {
+	case AF_INET:
+		lsk = pernet->join4.sk;
+		break;
+	case AF_INET6:
+		lsk = pernet->join6.sk;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return NULL;
+	}
+
+	msk = mptcp_sk(lsk);
+	ssock = __mptcp_nmpc_socket(msk);
+	lsk = ssock->sk;
+	sock_hold(lsk);
+	return lsk;
+}
+
+static struct socket *mptcp_create_join_listen_socket(struct net *net, int af)
+{
+	struct socket *s, *ssock;
+	int err;
+
+	err = sock_create_kern(net, af, SOCK_STREAM, IPPROTO_MPTCP, &s);
+	if (err)
+		return ERR_PTR(err);
+
+	ssock = __mptcp_nmpc_socket(mptcp_sk(s->sk));
+	if (!ssock) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	ssock->sk->sk_max_ack_backlog = SOMAXCONN;
+	inet_sk_state_store(ssock->sk, TCP_LISTEN);
+
+	s->sk->sk_max_ack_backlog = SOMAXCONN;
+	inet_sk_state_store(s->sk, TCP_LISTEN);
+
+	s->sk->sk_net_refcnt = 1;
+	get_net_track(net, &s->sk->ns_tracker, GFP_KERNEL);
+	sock_inuse_add(net, 1);
+
+	return s;
+out:
+	sock_release(s);
+	return ERR_PTR(err);
+}
+
+static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_join_sk *join_sk)
+{
+	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(sk));
+	struct inet_hashinfo *table = ssock->sk->sk_prot->h.hashinfo;
+	struct inet_bind_bucket *tb;
+
+	spin_lock_init(&join_sk->head.lock);
+	INIT_HLIST_HEAD(&join_sk->head.chain);
+
+	/* Our "listen socket" isn't bound to any address or port.
+	 * Conceptually, SYN packet with mptcp join request are steered to
+	 * this pernet socket just like TPROXY steals arbitrary connection
+	 * requests to assign them to listening socket with different
+	 * address or port.
+	 *
+	 * The bind_bucket is needed for sake of __inet_inherit_port(),
+	 * so it can place the new child socket in the correct
+	 * bind_bucket slot.
+	 *
+	 * A phony head is used to hide this socket from normal sk loookup.
+	 */
+	tb = inet_bind_bucket_create(table->bind_bucket_cachep,
+				     net, &join_sk->head, 0, 0);
+	if (!tb)
+		return -ENOMEM;
+
+	inet_csk(ssock->sk)->icsk_bind_hash = tb;
+	return 0;
+}
+
 static int __net_init mptcp_net_init(struct net *net)
 {
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+	struct socket *sock;
+	int err;
 
 	mptcp_pernet_set_defaults(pernet);
 
-	return mptcp_pernet_new_table(net, pernet);
+	err = mptcp_pernet_new_table(net, pernet);
+	if (err)
+		return err;
+
+	sock = mptcp_create_join_listen_socket(net, AF_INET);
+	if (IS_ERR(sock)) {
+		err = PTR_ERR(sock);
+		goto out_table;
+	}
+
+	err = mptcp_init_join_sk(net, sock->sk, &pernet->join4);
+	if (err) {
+		sock_release(sock);
+		goto out_table;
+	}
+
+	/* struct sock is still reachable via sock->sk_socket backpointer */
+	pernet->join4.sk = sock->sk;
+	return err;
+
+out_table:
+	if (!net_eq(net, &init_net))
+		mptcp_pernet_del_table(pernet);
+	return err;
+}
+
+static void __net_exit mptcp_exit_join_sk(struct mptcp_join_sk *jsk)
+{
+	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(jsk->sk));
+	struct inet_bind_bucket *tb;
+	struct inet_hashinfo *table;
+
+	table = ssock->sk->sk_prot->h.hashinfo;
+
+	tb = inet_csk(ssock->sk)->icsk_bind_hash;
+	inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
+
+	ssock = jsk->sk->sk_socket;
+	sock_release(ssock);
 }
 
 /* Note: the callback will only be called per extra netns */
@@ -200,6 +381,7 @@ static void __net_exit mptcp_net_exit(struct net *net)
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
 
 	mptcp_pernet_del_table(pernet);
+	mptcp_exit_join_sk(&pernet->join4);
 }
 
 static struct pernet_operations mptcp_pernet_ops = {
@@ -219,12 +401,36 @@ void __init mptcp_init(void)
 }
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int __init mptcpv6_init(void)
+int __net_init mptcpv6_init_net(struct net *net)
 {
+	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+	struct socket *sock;
 	int err;
 
-	err = mptcp_proto_v6_init();
+	if (net_eq(net, &init_net)) {
+		err = mptcp_proto_v6_init();
+		if (err)
+			return err;
+	}
 
-	return err;
+	sock = mptcp_create_join_listen_socket(net, AF_INET6);
+	if (IS_ERR(sock))
+		return PTR_ERR(sock);
+
+	err = mptcp_init_join_sk(net, sock->sk, &pernet->join6);
+	if (err) {
+		sock_release(sock);
+		return err;
+	}
+
+	pernet->join6.sk = sock->sk;
+	return 0;
+}
+
+void __net_exit mptcpv6_exit_net(struct net *net)
+{
+	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+
+	mptcp_exit_join_sk(&pernet->join6);
 }
 #endif
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3324e1c61576..980e6531bf4e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3776,7 +3776,7 @@ static struct inet_protosw mptcp_v6_protosw = {
 	.flags		= INET_PROTOSW_ICSK,
 };
 
-int __init mptcp_proto_v6_init(void)
+int __net_init mptcp_proto_v6_init(void)
 {
 	int err;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 03e3880d274d..c6b2cf26bc88 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -647,7 +647,7 @@ static inline bool mptcp_has_another_subflow(struct sock *ssk)
 
 void __init mptcp_proto_init(void);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int __init mptcp_proto_v6_init(void);
+int __net_init mptcp_proto_v6_init(void);
 #endif
 
 struct sock *mptcp_sk_clone(const struct sock *sk,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d50cf555ea40..d54c6685c036 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -116,6 +116,9 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
 
 static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
 {
+	if (inet_sk(sk)->inet_sport == 0)
+		return true;
+
 	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH mptcp-next 4/4] mptcp: remove per-address listening sockets
  2022-02-10 15:29 [PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets Florian Westphal
                   ` (2 preceding siblings ...)
  2022-02-10 15:29 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
@ 2022-02-10 15:29 ` Florian Westphal
  3 siblings, 0 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-10 15:29 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Not required anymore, syn packets with a join requests are redirected
to pernet mptcp pseudo-listening socket.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/pm_netlink.c | 65 ------------------------------------------
 1 file changed, 65 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1696f6fb9baa..4bc8904a25b6 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry {
 	struct mptcp_addr_info	addr;
 	u8			flags;
 	int			ifindex;
-	struct socket		*lsk;
 };
 
 struct mptcp_pm_add_entry {
@@ -954,57 +953,6 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
 	return ret;
 }
 
-static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
-					    struct mptcp_pm_addr_entry *entry)
-{
-	int addrlen = sizeof(struct sockaddr_in);
-	struct sockaddr_storage addr;
-	struct mptcp_sock *msk;
-	struct socket *ssock;
-	int backlog = 1024;
-	int err;
-
-	err = sock_create_kern(sock_net(sk), entry->addr.family,
-			       SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
-	if (err)
-		return err;
-
-	msk = mptcp_sk(entry->lsk->sk);
-	if (!msk) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	ssock = __mptcp_nmpc_socket(msk);
-	if (!ssock) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	if (entry->addr.family == AF_INET6)
-		addrlen = sizeof(struct sockaddr_in6);
-#endif
-	err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
-	if (err) {
-		pr_warn("kernel_bind error, err=%d", err);
-		goto out;
-	}
-
-	err = kernel_listen(ssock, backlog);
-	if (err) {
-		pr_warn("kernel_listen error, err=%d", err);
-		goto out;
-	}
-
-	return 0;
-
-out:
-	sock_release(entry->lsk);
-	return err;
-}
-
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 {
 	struct mptcp_pm_addr_entry *entry;
@@ -1050,7 +998,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 	entry->addr.port = 0;
 	entry->ifindex = 0;
 	entry->flags = 0;
-	entry->lsk = NULL;
 	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
 	if (ret < 0)
 		kfree(entry);
@@ -1258,19 +1205,9 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	*entry = addr;
-	if (entry->addr.port) {
-		ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
-		if (ret) {
-			GENL_SET_ERR_MSG(info, "create listen socket error");
-			kfree(entry);
-			return ret;
-		}
-	}
 	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
 	if (ret < 0) {
 		GENL_SET_ERR_MSG(info, "too many addresses or duplicate one");
-		if (entry->lsk)
-			sock_release(entry->lsk);
 		kfree(entry);
 		return ret;
 	}
@@ -1375,8 +1312,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 /* caller must ensure the RCU grace period is already elapsed */
 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
 {
-	if (entry->lsk)
-		sock_release(entry->lsk);
 	kfree(entry);
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-10 15:29 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
@ 2022-02-11  2:03   ` Mat Martineau
  2022-02-11 11:21     ` Paolo Abeni
  2022-02-12  0:08     ` Florian Westphal
  2022-02-11 11:03   ` Paolo Abeni
  2022-02-11 11:12   ` Matthieu Baerts
  2 siblings, 2 replies; 24+ messages in thread
From: Mat Martineau @ 2022-02-11  2:03 UTC (permalink / raw)
  To: Florian Westphal; +Cc: mptcp

On Thu, 10 Feb 2022, Florian Westphal wrote:

> Currently mptcp adds kernel-based listener socket for all
> netlink-configured mptcp address endpoints.
>
> This has caveats because kernel may interfere with unrelated programs
> that use same address/port pairs.
>

It looks like they still interfere with each other, but now in the 
opposite way: TCP listeners can now be created that interfere with 
MP_JOINs (and the MPTCP side loses).

Since mptcp_handle_join() is only called if the listener lookup fails, if 
a TCP listen socket has been created for an address & port advertised by 
MPTCP, that TCP listener will be looked up, process the SYN, and send a 
regular TCP SYN/ACK. The peer will then reject it due to lack of correct 
MPTCP options.

Seems like a few more TCP changes are needed to handle this listener 
collision well for both TCP and MPTCP, and without too much overhead. Is 
it too expensive to look for MPTCP options in every incoming TCP SYN 
header? Or to have the MPTCP PM code setting a "check for MP_JOIN" bit on 
TCP listener sockets that match advertised addresses?


-Mat

> RFC 8664 says:
> Demultiplexing subflow SYNs MUST be done using the token; this is
> unlike traditional TCP, where the destination port is used for
> demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
> packets is done using the 5-tuple, as in traditional TCP.
>
> This patch deviates from this in that it retrains the existing checks of
> verifying the incoming requests destination vs. the list of announced
> addresses.
>
> This can be relaxed later if deemed appropriate.
>
> The pernet 'listening' socket is not a listening socket from userspace
> point of view, it is not part of any hashes and not bound to any address
> or port.
>
> TPROXY-like semantics apply: If tcp demux cannot find a socket, check
> if the packet is a join request with a valid token.
>
> If so, the pernet listener is returned and tcp processing resumes.
> Otherwise, handling is intentical as if there is no socket.
>
> This patch does not handle timewait sockets.
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> include/net/mptcp.h  |  10 ++
> net/ipv6/tcp_ipv6.c  |  19 ++--
> net/mptcp/ctrl.c     | 214 ++++++++++++++++++++++++++++++++++++++++++-
> net/mptcp/protocol.c |   2 +-
> net/mptcp/protocol.h |   2 +-
> net/mptcp/subflow.c  |   3 +
> 6 files changed, 236 insertions(+), 14 deletions(-)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 5ee422b56902..49c188b978e1 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
> 				  struct sk_buff *skb);
>
> __be32 mptcp_get_reset_option(const struct sk_buff *skb);
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
>
> static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
> {
> @@ -199,6 +200,11 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
> }
> static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
> {
> +	const struct tcphdr *th = tcp_hdr(skb);
> +
> +	if (th->syn && !th->ack && !th->rst && !th->fin)
> +		return __mptcp_handle_join(af, skb);
> +
> 	return NULL;
> }
> #else
> @@ -283,9 +289,13 @@ static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { retu
>
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> int mptcpv6_init(void);
> +int mptcpv6_init_net(struct net *net);
> +void mptcpv6_exit_net(struct net *net);
> void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
> #elif IS_ENABLED(CONFIG_IPV6)
> static inline int mptcpv6_init(void) { return 0; }
> +static inline int mptcpv6_init_net(struct net *net) { return 0; }
> +static inline void mptcpv6_exit_net(struct net *net) { }
> static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
> #endif
>
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 788040db8e9e..3b8608d35dcd 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2233,13 +2233,22 @@ static struct inet_protosw tcpv6_protosw = {
>
> static int __net_init tcpv6_net_init(struct net *net)
> {
> -	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> -				    SOCK_RAW, IPPROTO_TCP, net);
> +	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> +				       SOCK_RAW, IPPROTO_TCP, net);
> +	if (err)
> +		return err;
> +
> +	err = mptcpv6_init_net(net);
> +	if (err)
> +		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +
> +	return err;
> }
>
> static void __net_exit tcpv6_net_exit(struct net *net)
> {
> 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +	mptcpv6_exit_net(net);
> }
>
> static struct pernet_operations tcpv6_net_ops = {
> @@ -2264,15 +2273,9 @@ int __init tcpv6_init(void)
> 	if (ret)
> 		goto out_tcpv6_protosw;
>
> -	ret = mptcpv6_init();
> -	if (ret)
> -		goto out_tcpv6_pernet_subsys;
> -
> out:
> 	return ret;
>
> -out_tcpv6_pernet_subsys:
> -	unregister_pernet_subsys(&tcpv6_net_ops);
> out_tcpv6_protosw:
> 	inet6_unregister_protosw(&tcpv6_protosw);
> out_tcpv6_protocol:
> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> index ae20b7d92e28..bba345f092af 100644
> --- a/net/mptcp/ctrl.c
> +++ b/net/mptcp/ctrl.c
> @@ -21,6 +21,12 @@ static int mptcp_pernet_id;
> static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
> #endif
>
> +struct mptcp_join_sk {
> +	struct sock *sk;
> +	struct inet_bind_bucket *tb;
> +	struct inet_bind_hashbucket head;
> +};
> +
> struct mptcp_pernet {
> #ifdef CONFIG_SYSCTL
> 	struct ctl_table_header *ctl_table_hdr;
> @@ -32,6 +38,18 @@ struct mptcp_pernet {
> 	u8 checksum_enabled;
> 	u8 allow_join_initial_addr_port;
> 	u8 pm_type;
> +
> +	/* pernet listener to handle mptcp join requests
> +	 * based on the mptcp token.
> +	 *
> +	 * Has to be pernet because tcp uses
> +	 * sock_net(sk_listener) to obtain the net namespace for
> +	 * the syn/ack route lookup.
> +	 */
> +	struct mptcp_join_sk join4;
> +#if IS_ENABLED(CONFIG_MPTCP_IPV6)
> +	struct mptcp_join_sk join6;
> +#endif
> };
>
> static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
> @@ -185,13 +203,176 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
>
> #endif /* CONFIG_SYSCTL */
>
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
> +{
> +	struct mptcp_options_received mp_opt;
> +	struct mptcp_pernet *pernet;
> +	struct mptcp_sock *msk;
> +	struct socket *ssock;
> +	struct sock *lsk;
> +	struct net *net;
> +
> +	/* paranoia check: don't allow 0 destination port,
> +	 * else __inet_inherit_port will insert the child socket
> +	 * into the phony hash slot of the pernet listener.
> +	 */
> +	if (tcp_hdr(skb)->dest == 0)
> +		return NULL;
> +
> +	mptcp_get_options(skb, &mp_opt);
> +
> +	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
> +		return NULL;
> +
> +	net = dev_net(skb_dst(skb)->dev);
> +	if (!mptcp_is_enabled(net))
> +		return NULL;
> +
> +	/* RFC8684: If the token is unknown [..], the receiver will send
> +	 * back a reset (RST) signal, analogous to an unknown port in TCP,
> +	 * containing an MP_TCPRST option (Section 3.6) [..]
> +	 */
> +	msk = mptcp_token_get_sock(net, mp_opt.token);
> +	if (!msk) {
> +		struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
> +
> +		if (ext) {
> +			memset(ext, 0, sizeof(*ext));
> +			ext->reset_reason = MPTCP_RST_EMPTCP;
> +		}
> +		return NULL;
> +	}
> +
> +	sock_put((struct sock *)msk);
> +	pernet = mptcp_get_pernet(net);
> +
> +	switch (af) {
> +	case AF_INET:
> +		lsk = pernet->join4.sk;
> +		break;
> +	case AF_INET6:
> +		lsk = pernet->join6.sk;
> +		break;
> +	default:
> +		WARN_ON_ONCE(1);
> +		return NULL;
> +	}
> +
> +	msk = mptcp_sk(lsk);
> +	ssock = __mptcp_nmpc_socket(msk);
> +	lsk = ssock->sk;
> +	sock_hold(lsk);
> +	return lsk;
> +}
> +
> +static struct socket *mptcp_create_join_listen_socket(struct net *net, int af)
> +{
> +	struct socket *s, *ssock;
> +	int err;
> +
> +	err = sock_create_kern(net, af, SOCK_STREAM, IPPROTO_MPTCP, &s);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	ssock = __mptcp_nmpc_socket(mptcp_sk(s->sk));
> +	if (!ssock) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	ssock->sk->sk_max_ack_backlog = SOMAXCONN;
> +	inet_sk_state_store(ssock->sk, TCP_LISTEN);
> +
> +	s->sk->sk_max_ack_backlog = SOMAXCONN;
> +	inet_sk_state_store(s->sk, TCP_LISTEN);
> +
> +	s->sk->sk_net_refcnt = 1;
> +	get_net_track(net, &s->sk->ns_tracker, GFP_KERNEL);
> +	sock_inuse_add(net, 1);
> +
> +	return s;
> +out:
> +	sock_release(s);
> +	return ERR_PTR(err);
> +}
> +
> +static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_join_sk *join_sk)
> +{
> +	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(sk));
> +	struct inet_hashinfo *table = ssock->sk->sk_prot->h.hashinfo;
> +	struct inet_bind_bucket *tb;
> +
> +	spin_lock_init(&join_sk->head.lock);
> +	INIT_HLIST_HEAD(&join_sk->head.chain);
> +
> +	/* Our "listen socket" isn't bound to any address or port.
> +	 * Conceptually, SYN packet with mptcp join request are steered to
> +	 * this pernet socket just like TPROXY steals arbitrary connection
> +	 * requests to assign them to listening socket with different
> +	 * address or port.
> +	 *
> +	 * The bind_bucket is needed for sake of __inet_inherit_port(),
> +	 * so it can place the new child socket in the correct
> +	 * bind_bucket slot.
> +	 *
> +	 * A phony head is used to hide this socket from normal sk loookup.
> +	 */
> +	tb = inet_bind_bucket_create(table->bind_bucket_cachep,
> +				     net, &join_sk->head, 0, 0);
> +	if (!tb)
> +		return -ENOMEM;
> +
> +	inet_csk(ssock->sk)->icsk_bind_hash = tb;
> +	return 0;
> +}
> +
> static int __net_init mptcp_net_init(struct net *net)
> {
> 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +	struct socket *sock;
> +	int err;
>
> 	mptcp_pernet_set_defaults(pernet);
>
> -	return mptcp_pernet_new_table(net, pernet);
> +	err = mptcp_pernet_new_table(net, pernet);
> +	if (err)
> +		return err;
> +
> +	sock = mptcp_create_join_listen_socket(net, AF_INET);
> +	if (IS_ERR(sock)) {
> +		err = PTR_ERR(sock);
> +		goto out_table;
> +	}
> +
> +	err = mptcp_init_join_sk(net, sock->sk, &pernet->join4);
> +	if (err) {
> +		sock_release(sock);
> +		goto out_table;
> +	}
> +
> +	/* struct sock is still reachable via sock->sk_socket backpointer */
> +	pernet->join4.sk = sock->sk;
> +	return err;
> +
> +out_table:
> +	if (!net_eq(net, &init_net))
> +		mptcp_pernet_del_table(pernet);
> +	return err;
> +}
> +
> +static void __net_exit mptcp_exit_join_sk(struct mptcp_join_sk *jsk)
> +{
> +	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(jsk->sk));
> +	struct inet_bind_bucket *tb;
> +	struct inet_hashinfo *table;
> +
> +	table = ssock->sk->sk_prot->h.hashinfo;
> +
> +	tb = inet_csk(ssock->sk)->icsk_bind_hash;
> +	inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
> +
> +	ssock = jsk->sk->sk_socket;
> +	sock_release(ssock);
> }
>
> /* Note: the callback will only be called per extra netns */
> @@ -200,6 +381,7 @@ static void __net_exit mptcp_net_exit(struct net *net)
> 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
>
> 	mptcp_pernet_del_table(pernet);
> +	mptcp_exit_join_sk(&pernet->join4);
> }
>
> static struct pernet_operations mptcp_pernet_ops = {
> @@ -219,12 +401,36 @@ void __init mptcp_init(void)
> }
>
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> -int __init mptcpv6_init(void)
> +int __net_init mptcpv6_init_net(struct net *net)
> {
> +	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +	struct socket *sock;
> 	int err;
>
> -	err = mptcp_proto_v6_init();
> +	if (net_eq(net, &init_net)) {
> +		err = mptcp_proto_v6_init();
> +		if (err)
> +			return err;
> +	}
>
> -	return err;
> +	sock = mptcp_create_join_listen_socket(net, AF_INET6);
> +	if (IS_ERR(sock))
> +		return PTR_ERR(sock);
> +
> +	err = mptcp_init_join_sk(net, sock->sk, &pernet->join6);
> +	if (err) {
> +		sock_release(sock);
> +		return err;
> +	}
> +
> +	pernet->join6.sk = sock->sk;
> +	return 0;
> +}
> +
> +void __net_exit mptcpv6_exit_net(struct net *net)
> +{
> +	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +
> +	mptcp_exit_join_sk(&pernet->join6);
> }
> #endif
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 3324e1c61576..980e6531bf4e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -3776,7 +3776,7 @@ static struct inet_protosw mptcp_v6_protosw = {
> 	.flags		= INET_PROTOSW_ICSK,
> };
>
> -int __init mptcp_proto_v6_init(void)
> +int __net_init mptcp_proto_v6_init(void)
> {
> 	int err;
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 03e3880d274d..c6b2cf26bc88 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -647,7 +647,7 @@ static inline bool mptcp_has_another_subflow(struct sock *ssk)
>
> void __init mptcp_proto_init(void);
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> -int __init mptcp_proto_v6_init(void);
> +int __net_init mptcp_proto_v6_init(void);
> #endif
>
> struct sock *mptcp_sk_clone(const struct sock *sk,
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index d50cf555ea40..d54c6685c036 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -116,6 +116,9 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
>
> static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
> {
> +	if (inet_sk(sk)->inet_sport == 0)
> +		return true;
> +
> 	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
> }
>
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address
  2022-02-10 15:29 ` [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
@ 2022-02-11 10:34   ` Paolo Abeni
  0 siblings, 0 replies; 24+ messages in thread
From: Paolo Abeni @ 2022-02-11 10:34 UTC (permalink / raw)
  To: Florian Westphal, mptcp

On Thu, 2022-02-10 at 16:29 +0100, Florian Westphal wrote:
> Once we change mptcp to use tproxy-like scheme to steer mptcp join
> requests to a special pernet socket, the 'sk bound address' becomes
> meaningless because it will never be identical to the tcp dport/ip daddr
> of the on-wire packet.
> 
> Prepare for this: pass the skbuff and use the packet data instead of
> the address the listener socket is bound to.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/mptcp/pm_netlink.c | 17 +++++++++++++++--
>  net/mptcp/protocol.h   |  2 +-
>  net/mptcp/subflow.c    |  5 +++--
>  3 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
> index 98b485406afa..1696f6fb9baa 100644
> --- a/net/mptcp/pm_netlink.c
> +++ b/net/mptcp/pm_netlink.c
> @@ -279,13 +279,26 @@ mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
>  	return NULL;
>  }
>  
> -bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
> +static void skb_address(const struct sk_buff *skb,
> +			struct mptcp_addr_info *addr)

Very minor nit: I would rename the above helper to something more
expressive, alike skb_fetch_src_address() or the like.


/P


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-10 15:29 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
  2022-02-11  2:03   ` Mat Martineau
@ 2022-02-11 11:03   ` Paolo Abeni
  2022-02-12  0:12     ` Florian Westphal
  2022-02-11 11:12   ` Matthieu Baerts
  2 siblings, 1 reply; 24+ messages in thread
From: Paolo Abeni @ 2022-02-11 11:03 UTC (permalink / raw)
  To: Florian Westphal, mptcp

On Thu, 2022-02-10 at 16:29 +0100, Florian Westphal wrote:
> Currently mptcp adds kernel-based listener socket for all
> netlink-configured mptcp address endpoints.
> 
> This has caveats because kernel may interfere with unrelated programs
> that use same address/port pairs.
> 
> RFC 8664 says:
>  Demultiplexing subflow SYNs MUST be done using the token; this is
>  unlike traditional TCP, where the destination port is used for
>  demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
>  packets is done using the 5-tuple, as in traditional TCP.
> 
> This patch deviates from this in that it retrains the existing checks of
> verifying the incoming requests destination vs. the list of announced
> addresses.
> 
> This can be relaxed later if deemed appropriate.
> 
> The pernet 'listening' socket is not a listening socket from userspace
> point of view, it is not part of any hashes and not bound to any address
> or port.
> 
> TPROXY-like semantics apply: If tcp demux cannot find a socket, check
> if the packet is a join request with a valid token.
> 
> If so, the pernet listener is returned and tcp processing resumes.
> Otherwise, handling is intentical as if there is no socket.
> 
> This patch does not handle timewait sockets.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  include/net/mptcp.h  |  10 ++
>  net/ipv6/tcp_ipv6.c  |  19 ++--
>  net/mptcp/ctrl.c     | 214 ++++++++++++++++++++++++++++++++++++++++++-
>  net/mptcp/protocol.c |   2 +-
>  net/mptcp/protocol.h |   2 +-
>  net/mptcp/subflow.c  |   3 +
>  6 files changed, 236 insertions(+), 14 deletions(-)
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 5ee422b56902..49c188b978e1 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
>  				  struct sk_buff *skb);
>  
>  __be32 mptcp_get_reset_option(const struct sk_buff *skb);
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
>  
>  static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
>  {
> @@ -199,6 +200,11 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
>  }
>  static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb)
>  {
> +	const struct tcphdr *th = tcp_hdr(skb);
> +
> +	if (th->syn && !th->ack && !th->rst && !th->fin)
> +		return __mptcp_handle_join(af, skb);
> +
>  	return NULL;
>  }
>  #else
> @@ -283,9 +289,13 @@ static inline struct sock *mptcp_handle_join(int af, struct sk_buff *skb) { retu
>  
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>  int mptcpv6_init(void);
> +int mptcpv6_init_net(struct net *net);
> +void mptcpv6_exit_net(struct net *net);
>  void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
>  #elif IS_ENABLED(CONFIG_IPV6)
>  static inline int mptcpv6_init(void) { return 0; }
> +static inline int mptcpv6_init_net(struct net *net) { return 0; }
> +static inline void mptcpv6_exit_net(struct net *net) { }
>  static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
>  #endif
>  
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 788040db8e9e..3b8608d35dcd 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2233,13 +2233,22 @@ static struct inet_protosw tcpv6_protosw = {
>  
>  static int __net_init tcpv6_net_init(struct net *net)
>  {
> -	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> -				    SOCK_RAW, IPPROTO_TCP, net);
> +	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> +				       SOCK_RAW, IPPROTO_TCP, net);
> +	if (err)
> +		return err;
> +
> +	err = mptcpv6_init_net(net);
> +	if (err)
> +		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +
> +	return err;
>  }
>  
>  static void __net_exit tcpv6_net_exit(struct net *net)
>  {
>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +	mptcpv6_exit_net(net);
>  }
>  
>  static struct pernet_operations tcpv6_net_ops = {
> @@ -2264,15 +2273,9 @@ int __init tcpv6_init(void)
>  	if (ret)
>  		goto out_tcpv6_protosw;
>  
> -	ret = mptcpv6_init();
> -	if (ret)
> -		goto out_tcpv6_pernet_subsys;
> -
>  out:
>  	return ret;
>  
> -out_tcpv6_pernet_subsys:
> -	unregister_pernet_subsys(&tcpv6_net_ops);
>  out_tcpv6_protosw:
>  	inet6_unregister_protosw(&tcpv6_protosw);
>  out_tcpv6_protocol:
> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> index ae20b7d92e28..bba345f092af 100644
> --- a/net/mptcp/ctrl.c
> +++ b/net/mptcp/ctrl.c
> @@ -21,6 +21,12 @@ static int mptcp_pernet_id;
>  static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
>  #endif
>  
> +struct mptcp_join_sk {
> +	struct sock *sk;
> +	struct inet_bind_bucket *tb;
> +	struct inet_bind_hashbucket head;
> +};
> +
>  struct mptcp_pernet {
>  #ifdef CONFIG_SYSCTL
>  	struct ctl_table_header *ctl_table_hdr;
> @@ -32,6 +38,18 @@ struct mptcp_pernet {
>  	u8 checksum_enabled;
>  	u8 allow_join_initial_addr_port;
>  	u8 pm_type;
> +
> +	/* pernet listener to handle mptcp join requests
> +	 * based on the mptcp token.
> +	 *
> +	 * Has to be pernet because tcp uses
> +	 * sock_net(sk_listener) to obtain the net namespace for
> +	 * the syn/ack route lookup.
> +	 */

A possible alternative would be proving a __tcp_conn_request() variant
which uses an exiplicit 'net' argument. tcp_conn_request() could be an
inline on top of the above.

Or we can use a global per-cpu set of "listeners", setting the net
field just before calling tcp_conn_request.

> +	struct mptcp_join_sk join4;
> +#if IS_ENABLED(CONFIG_MPTCP_IPV6)
> +	struct mptcp_join_sk join6;
> +#endif
>  };
>  
>  static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
> @@ -185,13 +203,176 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
>  
>  #endif /* CONFIG_SYSCTL */
>  
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
> +{
> +	struct mptcp_options_received mp_opt;
> +	struct mptcp_pernet *pernet;
> +	struct mptcp_sock *msk;
> +	struct socket *ssock;
> +	struct sock *lsk;
> +	struct net *net;
> +
> +	/* paranoia check: don't allow 0 destination port,
> +	 * else __inet_inherit_port will insert the child socket
> +	 * into the phony hash slot of the pernet listener.
> +	 */
> +	if (tcp_hdr(skb)->dest == 0)
> +		return NULL;
> +
> +	mptcp_get_options(skb, &mp_opt);
> +
> +	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
> +		return NULL;
> +
> +	net = dev_net(skb_dst(skb)->dev);
> +	if (!mptcp_is_enabled(net))
> +		return NULL;
> +
> +	/* RFC8684: If the token is unknown [..], the receiver will send
> +	 * back a reset (RST) signal, analogous to an unknown port in TCP,
> +	 * containing an MP_TCPRST option (Section 3.6) [..]
> +	 */
> +	msk = mptcp_token_get_sock(net, mp_opt.token);
> +	if (!msk) {
> +		struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
> +
> +		if (ext) {
> +			memset(ext, 0, sizeof(*ext));
> +			ext->reset_reason = MPTCP_RST_EMPTCP;
> +		}
> +		return NULL;
> +	}
> +
> +	sock_put((struct sock *)msk);

This should be called under the RCU lock, right? we could have
__mptcp_token_lookup_sock variant that does not touches the msk
reference count.

> +	pernet = mptcp_get_pernet(net);
> +
> +	switch (af) {
> +	case AF_INET:
> +		lsk = pernet->join4.sk;
> +		break;
> +	case AF_INET6:
> +		lsk = pernet->join6.sk;
> +		break;
> +	default:
> +		WARN_ON_ONCE(1);
> +		return NULL;
> +	}
> +
> +	msk = mptcp_sk(lsk);
> +	ssock = __mptcp_nmpc_socket(msk);
> +	lsk = ssock->sk;
> +	sock_hold(lsk);

If I read correctly, at this point 'refcounted' should be 'false' in
the caller (either tcp_v4_rcv or tcp_v6_rcv), so we don't need to
acquire a reference to lsk ?!?

/P


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-10 15:29 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
  2022-02-11  2:03   ` Mat Martineau
  2022-02-11 11:03   ` Paolo Abeni
@ 2022-02-11 11:12   ` Matthieu Baerts
  2022-02-12  0:13     ` Florian Westphal
  2 siblings, 1 reply; 24+ messages in thread
From: Matthieu Baerts @ 2022-02-11 11:12 UTC (permalink / raw)
  To: Florian Westphal, mptcp

Hi Florian

On 10/02/2022 16:29, Florian Westphal wrote:
> Currently mptcp adds kernel-based listener socket for all
> netlink-configured mptcp address endpoints.
> 
> This has caveats because kernel may interfere with unrelated programs
> that use same address/port pairs.

Thank you for working on that!

> RFC 8664 says:

Very minor nit: s/8664/8684/

Cheers
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-11  2:03   ` Mat Martineau
@ 2022-02-11 11:21     ` Paolo Abeni
  2022-02-12  0:08     ` Florian Westphal
  1 sibling, 0 replies; 24+ messages in thread
From: Paolo Abeni @ 2022-02-11 11:21 UTC (permalink / raw)
  To: Mat Martineau, Florian Westphal; +Cc: mptcp

On Thu, 2022-02-10 at 18:03 -0800, Mat Martineau wrote:
> On Thu, 10 Feb 2022, Florian Westphal wrote:
> 
> > Currently mptcp adds kernel-based listener socket for all
> > netlink-configured mptcp address endpoints.
> > 
> > This has caveats because kernel may interfere with unrelated programs
> > that use same address/port pairs.
> > 
> 
> It looks like they still interfere with each other, but now in the 
> opposite way: TCP listeners can now be created that interfere with 
> MP_JOINs (and the MPTCP side loses).
> 
> Since mptcp_handle_join() is only called if the listener lookup fails, if 
> a TCP listen socket has been created for an address & port advertised by 
> MPTCP, that TCP listener will be looked up, process the SYN, and send a 
> regular TCP SYN/ACK. The peer will then reject it due to lack of correct 
> MPTCP options.

Uhm... I think the above could only happen with some misconfiguration.
e.g. the user/admin runs on an announced address, on the same port of
the MPTCP service (or of the mptcp endpoint), a different service.

IMHO one important point is that the behavior is consistent: TCP will
be always preferred, with no races - modulo bugs - and user/admin will
be notified of the misconfiguration by the fallback. 

It should be also quite easy to clearly document.

> Seems like a few more TCP changes are needed to handle this listener 
> collision well for both TCP and MPTCP, and without too much overhead. Is 
> it too expensive to look for MPTCP options in every incoming TCP SYN 
> header? Or to have the MPTCP PM code setting a "check for MP_JOIN" bit on 
> TCP listener sockets that match advertised addresses?

I'm all for the latter option. We can either walk the endpoint list or
maintain an additional small hash to make the lookup faster.

/P


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-11  2:03   ` Mat Martineau
  2022-02-11 11:21     ` Paolo Abeni
@ 2022-02-12  0:08     ` Florian Westphal
  1 sibling, 0 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-12  0:08 UTC (permalink / raw)
  To: Mat Martineau; +Cc: Florian Westphal, mptcp

Mat Martineau <mathew.j.martineau@linux.intel.com> wrote:
> On Thu, 10 Feb 2022, Florian Westphal wrote:
> 
> > Currently mptcp adds kernel-based listener socket for all
> > netlink-configured mptcp address endpoints.
> > 
> > This has caveats because kernel may interfere with unrelated programs
> > that use same address/port pairs.
> > 
> 
> It looks like they still interfere with each other, but now in the opposite
> way: TCP listeners can now be created that interfere with MP_JOINs (and the
> MPTCP side loses).

Yep, I'm not sure its a good idea to announce random addresses:ports.

> Since mptcp_handle_join() is only called if the listener lookup fails, if a
> TCP listen socket has been created for an address & port advertised by
> MPTCP, that TCP listener will be looked up, process the SYN, and send a
> regular TCP SYN/ACK. The peer will then reject it due to lack of correct
> MPTCP options.

Correct.  Its easily fixable by doing mptcp_handle_join() before the
lookup, but that means a new conditional in tcp fastpath.

I'm not sure TCP maintainers will eat that, but its certainly doable.
Idea would be to have __mptcp_handle_join() make a listen socket lookup
and then assign 'skb->sk = magic_listen' if there is none, just like TPROXY.

> Seems like a few more TCP changes are needed to handle this listener
> collision well for both TCP and MPTCP, and without too much overhead. Is it
> too expensive to look for MPTCP options in every incoming TCP SYN header?

I'm not worried about that, Its just the extra
if (th->syn && !th->ack && ...
conditional inside mptcp_handle_join().

ATM the extra conditional is hit only in the error path, not for every
packet.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-11 11:03   ` Paolo Abeni
@ 2022-02-12  0:12     ` Florian Westphal
  0 siblings, 0 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-12  0:12 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: Florian Westphal, mptcp

Paolo Abeni <pabeni@redhat.com> wrote:
> On Thu, 2022-02-10 at 16:29 +0100, Florian Westphal wrote:
> > +
> > +	/* pernet listener to handle mptcp join requests
> > +	 * based on the mptcp token.
> > +	 *
> > +	 * Has to be pernet because tcp uses
> > +	 * sock_net(sk_listener) to obtain the net namespace for
> > +	 * the syn/ack route lookup.
> > +	 */
> 
> A possible alternative would be proving a __tcp_conn_request() variant
> which uses an exiplicit 'net' argument. tcp_conn_request() could be an
> inline on top of the above.

Yes, we could change tcp stack to allow explicit net arg.

> Or we can use a global per-cpu set of "listeners", setting the net
> field just before calling tcp_conn_request.

I would prefer to avoid that, might get messy at least for RT folks?
If you don't want pernet, then I think additional tcp surgery is better.

> > +	sock_put((struct sock *)msk);
> 
> This should be called under the RCU lock, right? we could have
> __mptcp_token_lookup_sock variant that does not touches the msk
> reference count.

Yes, we should make token api netns safe and then use the 'exist'
variant which takes no reference count.

> > +	msk = mptcp_sk(lsk);
> > +	ssock = __mptcp_nmpc_socket(msk);
> > +	lsk = ssock->sk;
> > +	sock_hold(lsk);
> 
> If I read correctly, at this point 'refcounted' should be 'false' in
> the caller (either tcp_v4_rcv or tcp_v6_rcv), so we don't need to
> acquire a reference to lsk ?!?

Yes, the refcount increase is wrong for sure, its not needed unless
we go for 'skb->sk = lsk' route.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-11 11:12   ` Matthieu Baerts
@ 2022-02-12  0:13     ` Florian Westphal
  0 siblings, 0 replies; 24+ messages in thread
From: Florian Westphal @ 2022-02-12  0:13 UTC (permalink / raw)
  To: Matthieu Baerts; +Cc: Florian Westphal, mptcp

Matthieu Baerts <matthieu.baerts@tessares.net> wrote:
> On 10/02/2022 16:29, Florian Westphal wrote:
> > RFC 8664 says:
> 
> Very minor nit: s/8664/8684/

Ugh.  I'll fix that up, thanks for noticing.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-09 21:37             ` Florian Westphal
  2022-03-09 23:40               ` Kishen Maloor
@ 2022-03-11  1:16               ` Mat Martineau
  1 sibling, 0 replies; 24+ messages in thread
From: Mat Martineau @ 2022-03-11  1:16 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Kishen Maloor, mptcp

On Wed, 9 Mar 2022, Florian Westphal wrote:

> Kishen Maloor <kishen.maloor@intel.com> wrote:
>> On 3/9/22 4:53 AM, Florian Westphal wrote:
>>> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>>>>> Over a newly established MPTCP connection following listen(s1), the PM can issue an
>>>>>> ADD_ADDR with B. In light of this change there would be no listener created for B.
>>>>>> But if the remote endpoint immediately established a subflow in response (to the
>>>>>> ADD_ADDR), then that would create a subflow (connection) socket at B.
>>>>>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
>>>>>
>>>>> Why would that fail? You can bind x:y even if there is an established
>>>>> connection from x:y to q:r.
>>>>
>>>> If I establish an MPTCP connection using mptcp_connect individually as
>>>> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's
>>>> addr+port [1]. Why is this the case?
>>>
>>> Whats [1]?
>>> I suspect this patch series needs following addition in patch 3:
>>>
>>> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
>>> --- a/net/mptcp/ctrl.c
>>> +++ b/net/mptcp/ctrl.c
>>> @@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
>>>  	if (!tb)
>>>  		return -ENOMEM;
>>>
>>> +	ssock->sk->sk_reuse = 1;
>>> +	ssock->sk->sk_reuseport = 1;
>>>  	inet_csk(ssock->sk)->icsk_bind_hash = tb;
>>>  	return 0;
>>>  }
>>>
>>> After that, follwing sequence should work:
>>>
>>> 1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
>>> 2. announce p2
>>> 3. receive join on addr, p2
>>> 4. bind(0.0.0.0, p2)
>>>
>>> 4) should work because sk used for endpoint in 3) has reuse flag set
>>> and is not in listen state.
>>>
>>> cf. include/net/inet_hashtables.h, line 47:
>>> 2) If all sockets have sk->sk_reuse set, and none of them
>>>    TCP_LISTEN state, the port may be shared.
>>>
>>
>> Wouldn't 4) fail if the socket being bound at the time does not have the SO_REUSExxx flag(s) set?
>
> Yes, it needs SO_REUSEADDR set.
>
>> If so, that would be application level thing and in that situation we don't have a way to
>> avoid a race.  Whereas when we require an explicit listener, we could have the kernel take a step
>> back (and not create a listener) to break the race.
>
> Uh, what?  Sorry, I am totally lost.  I have no idea what the problem is
> that we're solving here.
>
> EOD, I am out of ideas.  Feel free to toss this patchset, I have no idea
> what to do.
>

Hi Florian -

After the meeting discussion today, I think we should shelve the pernet 
listeners for now. This series did get us a lot closer to "handle MP_JOINs 
everywhere" behavior, but the corner cases seemed to be pulling us in to 
more TCP changes.

More details: 
https://lore.kernel.org/mptcp/48686ee-4d79-c9fd-35d5-593b9ec9742b@linux.intel.com/


--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-10  0:37                 ` Mat Martineau
@ 2022-03-10  1:27                   ` Kishen Maloor
  0 siblings, 0 replies; 24+ messages in thread
From: Kishen Maloor @ 2022-03-10  1:27 UTC (permalink / raw)
  To: Mat Martineau; +Cc: Florian Westphal, mptcp

On 3/9/22 4:37 PM, Mat Martineau wrote:
> On Wed, 9 Mar 2022, Kishen Maloor wrote:
> 
>> On 3/9/22 1:37 PM, Florian Westphal wrote:
>>> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>>> On 3/9/22 4:53 AM, Florian Westphal wrote:
>>>>> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>>>>>>> Over a newly established MPTCP connection following listen(s1), the PM can issue an
>>>>>>>> ADD_ADDR with B. In light of this change there would be no listener created for B.
>>>>>>>> But if the remote endpoint immediately established a subflow in response (to the
>>>>>>>> ADD_ADDR), then that would create a subflow (connection) socket at B.
>>>>>>>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
>>>>>>>
>>>>>>> Why would that fail? You can bind x:y even if there is an established
>>>>>>> connection from x:y to q:r.
>>>>>>
>>>>>> If I establish an MPTCP connection using mptcp_connect individually as
>>>>>> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's
>>>>>> addr+port [1]. Why is this the case?
>>>>>
>>>>> Whats [1]?
>>>>> I suspect this patch series needs following addition in patch 3:
>>>>>
>>>>> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
>>>>> --- a/net/mptcp/ctrl.c
>>>>> +++ b/net/mptcp/ctrl.c
>>>>> @@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
>>>>>      if (!tb)
>>>>>          return -ENOMEM;
>>>>>
>>>>> +    ssock->sk->sk_reuse = 1;
>>>>> +    ssock->sk->sk_reuseport = 1;
>>>>>      inet_csk(ssock->sk)->icsk_bind_hash = tb;
>>>>>      return 0;
>>>>>  }
>>>>>
>>>>> After that, follwing sequence should work:
>>>>>
>>>>> 1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
>>>>> 2. announce p2
>>>>> 3. receive join on addr, p2
>>>>> 4. bind(0.0.0.0, p2)
>>>>>
>>>>> 4) should work because sk used for endpoint in 3) has reuse flag set
>>>>> and is not in listen state.
>>>>>
>>>>> cf. include/net/inet_hashtables.h, line 47:
>>>>> 2) If all sockets have sk->sk_reuse set, and none of them
>>>>>    TCP_LISTEN state, the port may be shared.
>>>>>
>>>>
>>>> Wouldn't 4) fail if the socket being bound at the time does not have the SO_REUSExxx flag(s) set?
>>>
>>> Yes, it needs SO_REUSEADDR set.
>>>
>>>> If so, that would be application level thing and in that situation we don't have a way to
>>>> avoid a race.  Whereas when we require an explicit listener, we could have the kernel take a step
>>>> back (and not create a listener) to break the race.
>>>
>>> Uh, what?  Sorry, I am totally lost.  I have no idea what the problem is
>>> that we're solving here.
>>>
>>> EOD, I am out of ideas.  Feel free to toss this patchset, I have no idea
>>> what to do.
>>
>> Sorry, what isn't clear?
>>
>> This series was meant to address perceived pitfalls of kernel listeners, such as
>> race conditions which were discussed. So, I believe we are trying to assess if
>> these changes indeed do that, or if anything further needs to be done.
>>
> 
> Responding to both Kishen and Florian:
> 
> I think Kishen's summary here is accurate - the way the current PM code uses listening sockets, and the expanded use of listening sockets in the userspace PM code, led to some hard-to-troubleshoot situations where in-kernel listeners interfered with applications expecting to bind() the same addresses / ports.
> 
> And I think what Kishen found while testing the userspace PM patches is that the "pernet listen socket" approach still has some bind() failure corner cases that application programs (and programmers) might find surprising, and that the kernel can't solve these cases. In comparison, with the in-kernel (explicit) listener sockets, the PM code can see some of those failures and work around them to some degree.
> 
> 
>> It seems we've thus far identified one change as a result of this conversation to avoid a
>> race, i.e. setting the _reuse_ flags inside mptcp_init_join_sk(). Is there anything else to do, or
>> are we now confident in these changes?
> 
> Kishen, to be sure I'm parsing the above question correctly, "these changes" == the "replace per-addr listener sockets" series this email thread is part of?
> 

That's correct. I was referring to this "replace per-addr listener sockets" series.

> If so, yes I think the reuse flags are part of moving forward with this, in addition to what you mention below.
> 
>>
>> I think the group needs to make a call on our path forward (one way or the other) so that
>> we can also progress on other related work.
>>
>> On a separate note, I brought together my entire userspace PM series and this on a
>> tree to test and found a couple of glitches in this patchset. A few userspace
>> PM subflows tests would fail due to that. I can respond later on this thread with the
>> specific modifications after I clean things up on my end.
> 
> We will talk about it again at the 10-March meeting, but it sounds like you have some modifications to propose that might be important to consider in a final decision. Do the fixes you have to this patchset involve the bind() issues above or are they totally separate?
> 

My changes are separate and fix a couple of logical errors in this patchset to make things work.

The bind() related issues discussed in this thread however was just me analyzing for situations
that could lead to races with the application, and was thinking along the lines of Paolo's prior
assessments.

> 
> -- 
> Mat Martineau
> Intel


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-09 23:40               ` Kishen Maloor
@ 2022-03-10  0:37                 ` Mat Martineau
  2022-03-10  1:27                   ` Kishen Maloor
  0 siblings, 1 reply; 24+ messages in thread
From: Mat Martineau @ 2022-03-10  0:37 UTC (permalink / raw)
  To: Kishen Maloor; +Cc: Florian Westphal, mptcp

On Wed, 9 Mar 2022, Kishen Maloor wrote:

> On 3/9/22 1:37 PM, Florian Westphal wrote:
>> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>> On 3/9/22 4:53 AM, Florian Westphal wrote:
>>>> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>>>>>> Over a newly established MPTCP connection following listen(s1), the PM can issue an
>>>>>>> ADD_ADDR with B. In light of this change there would be no listener created for B.
>>>>>>> But if the remote endpoint immediately established a subflow in response (to the
>>>>>>> ADD_ADDR), then that would create a subflow (connection) socket at B.
>>>>>>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
>>>>>>
>>>>>> Why would that fail? You can bind x:y even if there is an established
>>>>>> connection from x:y to q:r.
>>>>>
>>>>> If I establish an MPTCP connection using mptcp_connect individually as
>>>>> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's
>>>>> addr+port [1]. Why is this the case?
>>>>
>>>> Whats [1]?
>>>> I suspect this patch series needs following addition in patch 3:
>>>>
>>>> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
>>>> --- a/net/mptcp/ctrl.c
>>>> +++ b/net/mptcp/ctrl.c
>>>> @@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
>>>>  	if (!tb)
>>>>  		return -ENOMEM;
>>>>
>>>> +	ssock->sk->sk_reuse = 1;
>>>> +	ssock->sk->sk_reuseport = 1;
>>>>  	inet_csk(ssock->sk)->icsk_bind_hash = tb;
>>>>  	return 0;
>>>>  }
>>>>
>>>> After that, follwing sequence should work:
>>>>
>>>> 1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
>>>> 2. announce p2
>>>> 3. receive join on addr, p2
>>>> 4. bind(0.0.0.0, p2)
>>>>
>>>> 4) should work because sk used for endpoint in 3) has reuse flag set
>>>> and is not in listen state.
>>>>
>>>> cf. include/net/inet_hashtables.h, line 47:
>>>> 2) If all sockets have sk->sk_reuse set, and none of them
>>>>    TCP_LISTEN state, the port may be shared.
>>>>
>>>
>>> Wouldn't 4) fail if the socket being bound at the time does not have the SO_REUSExxx flag(s) set?
>>
>> Yes, it needs SO_REUSEADDR set.
>>
>>> If so, that would be application level thing and in that situation we don't have a way to
>>> avoid a race.  Whereas when we require an explicit listener, we could have the kernel take a step
>>> back (and not create a listener) to break the race.
>>
>> Uh, what?  Sorry, I am totally lost.  I have no idea what the problem is
>> that we're solving here.
>>
>> EOD, I am out of ideas.  Feel free to toss this patchset, I have no idea
>> what to do.
>
> Sorry, what isn't clear?
>
> This series was meant to address perceived pitfalls of kernel listeners, such as
> race conditions which were discussed. So, I believe we are trying to assess if
> these changes indeed do that, or if anything further needs to be done.
>

Responding to both Kishen and Florian:

I think Kishen's summary here is accurate - the way the current PM code 
uses listening sockets, and the expanded use of listening sockets in the 
userspace PM code, led to some hard-to-troubleshoot situations where 
in-kernel listeners interfered with applications expecting to bind() the 
same addresses / ports.

And I think what Kishen found while testing the userspace PM patches is 
that the "pernet listen socket" approach still has some bind() failure 
corner cases that application programs (and programmers) might find 
surprising, and that the kernel can't solve these cases. In comparison, 
with the in-kernel (explicit) listener sockets, the PM code can see some 
of those failures and work around them to some degree.


> It seems we've thus far identified one change as a result of this conversation to avoid a
> race, i.e. setting the _reuse_ flags inside mptcp_init_join_sk(). Is there anything else to do, or
> are we now confident in these changes?

Kishen, to be sure I'm parsing the above question correctly, "these 
changes" == the "replace per-addr listener sockets" series this email 
thread is part of?

If so, yes I think the reuse flags are part of moving forward with this, 
in addition to what you mention below.

>
> I think the group needs to make a call on our path forward (one way or the other) so that
> we can also progress on other related work.
>
> On a separate note, I brought together my entire userspace PM series and this on a
> tree to test and found a couple of glitches in this patchset. A few userspace
> PM subflows tests would fail due to that. I can respond later on this thread with the
> specific modifications after I clean things up on my end.

We will talk about it again at the 10-March meeting, but it sounds like 
you have some modifications to propose that might be important to consider 
in a final decision. Do the fixes you have to this patchset involve the 
bind() issues above or are they totally separate?


--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-09 21:37             ` Florian Westphal
@ 2022-03-09 23:40               ` Kishen Maloor
  2022-03-10  0:37                 ` Mat Martineau
  2022-03-11  1:16               ` Mat Martineau
  1 sibling, 1 reply; 24+ messages in thread
From: Kishen Maloor @ 2022-03-09 23:40 UTC (permalink / raw)
  To: Florian Westphal; +Cc: mptcp

On 3/9/22 1:37 PM, Florian Westphal wrote:
> Kishen Maloor <kishen.maloor@intel.com> wrote:
>> On 3/9/22 4:53 AM, Florian Westphal wrote:
>>> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>>>>> Over a newly established MPTCP connection following listen(s1), the PM can issue an 
>>>>>> ADD_ADDR with B. In light of this change there would be no listener created for B. 
>>>>>> But if the remote endpoint immediately established a subflow in response (to the 
>>>>>> ADD_ADDR), then that would create a subflow (connection) socket at B.
>>>>>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
>>>>>
>>>>> Why would that fail? You can bind x:y even if there is an established
>>>>> connection from x:y to q:r.
>>>>
>>>> If I establish an MPTCP connection using mptcp_connect individually as 
>>>> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's 
>>>> addr+port [1]. Why is this the case?
>>>
>>> Whats [1]?
>>> I suspect this patch series needs following addition in patch 3:
>>>
>>> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
>>> --- a/net/mptcp/ctrl.c
>>> +++ b/net/mptcp/ctrl.c
>>> @@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
>>>  	if (!tb)
>>>  		return -ENOMEM;
>>>  
>>> +	ssock->sk->sk_reuse = 1;
>>> +	ssock->sk->sk_reuseport = 1;
>>>  	inet_csk(ssock->sk)->icsk_bind_hash = tb;
>>>  	return 0;
>>>  }
>>>
>>> After that, follwing sequence should work:
>>>
>>> 1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
>>> 2. announce p2
>>> 3. receive join on addr, p2
>>> 4. bind(0.0.0.0, p2)
>>>
>>> 4) should work because sk used for endpoint in 3) has reuse flag set
>>> and is not in listen state.
>>>
>>> cf. include/net/inet_hashtables.h, line 47:
>>> 2) If all sockets have sk->sk_reuse set, and none of them
>>>    TCP_LISTEN state, the port may be shared.
>>>
>>
>> Wouldn't 4) fail if the socket being bound at the time does not have the SO_REUSExxx flag(s) set?
> 
> Yes, it needs SO_REUSEADDR set.
> 
>> If so, that would be application level thing and in that situation we don't have a way to
>> avoid a race.  Whereas when we require an explicit listener, we could have the kernel take a step
>> back (and not create a listener) to break the race.
> 
> Uh, what?  Sorry, I am totally lost.  I have no idea what the problem is
> that we're solving here.
> 
> EOD, I am out of ideas.  Feel free to toss this patchset, I have no idea
> what to do.

Sorry, what isn't clear? 

This series was meant to address perceived pitfalls of kernel listeners, such as
race conditions which were discussed. So, I believe we are trying to assess if
these changes indeed do that, or if anything further needs to be done. 

It seems we've thus far identified one change as a result of this conversation to avoid a
race, i.e. setting the _reuse_ flags inside mptcp_init_join_sk(). Is there anything else to do, or
are we now confident in these changes?

I think the group needs to make a call on our path forward (one way or the other) so that 
we can also progress on other related work.

On a separate note, I brought together my entire userspace PM series and this on a 
tree to test and found a couple of glitches in this patchset. A few userspace 
PM subflows tests would fail due to that. I can respond later on this thread with the
specific modifications after I clean things up on my end.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-09 17:40           ` Kishen Maloor
@ 2022-03-09 21:37             ` Florian Westphal
  2022-03-09 23:40               ` Kishen Maloor
  2022-03-11  1:16               ` Mat Martineau
  0 siblings, 2 replies; 24+ messages in thread
From: Florian Westphal @ 2022-03-09 21:37 UTC (permalink / raw)
  To: Kishen Maloor; +Cc: Florian Westphal, mptcp

Kishen Maloor <kishen.maloor@intel.com> wrote:
> On 3/9/22 4:53 AM, Florian Westphal wrote:
> > Kishen Maloor <kishen.maloor@intel.com> wrote:
> >>>> Over a newly established MPTCP connection following listen(s1), the PM can issue an 
> >>>> ADD_ADDR with B. In light of this change there would be no listener created for B. 
> >>>> But if the remote endpoint immediately established a subflow in response (to the 
> >>>> ADD_ADDR), then that would create a subflow (connection) socket at B.
> >>>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
> >>>
> >>> Why would that fail? You can bind x:y even if there is an established
> >>> connection from x:y to q:r.
> >>
> >> If I establish an MPTCP connection using mptcp_connect individually as 
> >> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's 
> >> addr+port [1]. Why is this the case?
> > 
> > Whats [1]?
> > I suspect this patch series needs following addition in patch 3:
> > 
> > diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> > --- a/net/mptcp/ctrl.c
> > +++ b/net/mptcp/ctrl.c
> > @@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
> >  	if (!tb)
> >  		return -ENOMEM;
> >  
> > +	ssock->sk->sk_reuse = 1;
> > +	ssock->sk->sk_reuseport = 1;
> >  	inet_csk(ssock->sk)->icsk_bind_hash = tb;
> >  	return 0;
> >  }
> > 
> > After that, follwing sequence should work:
> > 
> > 1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
> > 2. announce p2
> > 3. receive join on addr, p2
> > 4. bind(0.0.0.0, p2)
> > 
> > 4) should work because sk used for endpoint in 3) has reuse flag set
> > and is not in listen state.
> > 
> > cf. include/net/inet_hashtables.h, line 47:
> > 2) If all sockets have sk->sk_reuse set, and none of them
> >    TCP_LISTEN state, the port may be shared.
> > 
> 
> Wouldn't 4) fail if the socket being bound at the time does not have the SO_REUSExxx flag(s) set?

Yes, it needs SO_REUSEADDR set.

> If so, that would be application level thing and in that situation we don't have a way to
> avoid a race.  Whereas when we require an explicit listener, we could have the kernel take a step
> back (and not create a listener) to break the race.

Uh, what?  Sorry, I am totally lost.  I have no idea what the problem is
that we're solving here.

EOD, I am out of ideas.  Feel free to toss this patchset, I have no idea
what to do.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-09 12:53         ` Florian Westphal
@ 2022-03-09 17:40           ` Kishen Maloor
  2022-03-09 21:37             ` Florian Westphal
  0 siblings, 1 reply; 24+ messages in thread
From: Kishen Maloor @ 2022-03-09 17:40 UTC (permalink / raw)
  To: Florian Westphal; +Cc: mptcp

On 3/9/22 4:53 AM, Florian Westphal wrote:
> Kishen Maloor <kishen.maloor@intel.com> wrote:
>>>> Over a newly established MPTCP connection following listen(s1), the PM can issue an 
>>>> ADD_ADDR with B. In light of this change there would be no listener created for B. 
>>>> But if the remote endpoint immediately established a subflow in response (to the 
>>>> ADD_ADDR), then that would create a subflow (connection) socket at B.
>>>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
>>>
>>> Why would that fail? You can bind x:y even if there is an established
>>> connection from x:y to q:r.
>>
>> If I establish an MPTCP connection using mptcp_connect individually as 
>> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's 
>> addr+port [1]. Why is this the case?
> 
> Whats [1]?
> I suspect this patch series needs following addition in patch 3:
> 
> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> --- a/net/mptcp/ctrl.c
> +++ b/net/mptcp/ctrl.c
> @@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
>  	if (!tb)
>  		return -ENOMEM;
>  
> +	ssock->sk->sk_reuse = 1;
> +	ssock->sk->sk_reuseport = 1;
>  	inet_csk(ssock->sk)->icsk_bind_hash = tb;
>  	return 0;
>  }
> 
> After that, follwing sequence should work:
> 
> 1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
> 2. announce p2
> 3. receive join on addr, p2
> 4. bind(0.0.0.0, p2)
> 
> 4) should work because sk used for endpoint in 3) has reuse flag set
> and is not in listen state.
> 
> cf. include/net/inet_hashtables.h, line 47:
> 2) If all sockets have sk->sk_reuse set, and none of them
>    TCP_LISTEN state, the port may be shared.
> 

Wouldn't 4) fail if the socket being bound at the time does not have the SO_REUSExxx flag(s) set?

If so, that would be application level thing and in that situation we don't have a way to
avoid a race. Whereas when we require an explicit listener, we could have the kernel take a step
back (and not create a listener) to break the race.

(By the way, [1] was just an annotation so I could refer back to it in my statement
below :))

>> However, I am viewing this matter in light of your changes wherein a connection 
>> socket is established without there having been a bound listener at that 
>> addr+port. In that case, I was wondering if the above situation [1] would apply on a
>> subsequent bind(). If it does, then we could encounter a race such as I described. 
> 
> If so, we should consider dropping support for address announcements
> with ports, it seems too fragile.

Well, this isn't about port-based endpoints and I have thus far assumed in 
this conversation that we're just reusing the subflow port. So, A and B as taken from Paolo's 
original race condition scenario in my mind refer to addrA:portX and addrB:portX.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-08 23:00       ` Kishen Maloor
@ 2022-03-09 12:53         ` Florian Westphal
  2022-03-09 17:40           ` Kishen Maloor
  0 siblings, 1 reply; 24+ messages in thread
From: Florian Westphal @ 2022-03-09 12:53 UTC (permalink / raw)
  To: Kishen Maloor; +Cc: Florian Westphal, mptcp

Kishen Maloor <kishen.maloor@intel.com> wrote:
> >> Over a newly established MPTCP connection following listen(s1), the PM can issue an 
> >> ADD_ADDR with B. In light of this change there would be no listener created for B. 
> >> But if the remote endpoint immediately established a subflow in response (to the 
> >> ADD_ADDR), then that would create a subflow (connection) socket at B.
> >> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
> > 
> > Why would that fail? You can bind x:y even if there is an established
> > connection from x:y to q:r.
> 
> If I establish an MPTCP connection using mptcp_connect individually as 
> Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's 
> addr+port [1]. Why is this the case?

Whats [1]?
I suspect this patch series needs following addition in patch 3:

diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -337,6 +337,8 @@ static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_joi
 	if (!tb)
 		return -ENOMEM;
 
+	ssock->sk->sk_reuse = 1;
+	ssock->sk->sk_reuseport = 1;
 	inet_csk(ssock->sk)->icsk_bind_hash = tb;
 	return 0;
 }

After that, follwing sequence should work:

1. bind(0.0.0.0, p1) // listen, accept etc, initial subflow established
2. announce p2
3. receive join on addr, p2
4. bind(0.0.0.0, p2)

4) should work because sk used for endpoint in 3) has reuse flag set
and is not in listen state.

cf. include/net/inet_hashtables.h, line 47:
2) If all sockets have sk->sk_reuse set, and none of them
   TCP_LISTEN state, the port may be shared.

> However, I am viewing this matter in light of your changes wherein a connection 
> socket is established without there having been a bound listener at that 
> addr+port. In that case, I was wondering if the above situation [1] would apply on a
> subsequent bind(). If it does, then we could encounter a race such as I described. 

If so, we should consider dropping support for address announcements
with ports, it seems too fragile.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-08 18:45     ` Florian Westphal
@ 2022-03-08 23:00       ` Kishen Maloor
  2022-03-09 12:53         ` Florian Westphal
  0 siblings, 1 reply; 24+ messages in thread
From: Kishen Maloor @ 2022-03-08 23:00 UTC (permalink / raw)
  To: Florian Westphal; +Cc: mptcp

On 3/8/22 10:45 AM, Florian Westphal wrote:
> Kishen Maloor <kishen.maloor@intel.com> wrote:
>> Hi Florian, Paolo,
>>
>> I had responded as below to v2 of this series but wondered if perhaps it got lost
>> in the barrage of emails. So thought I'd resend it as I still have questions
>> regarding our rationale here.
>>
>> On 2/24/22 7:50 AM, Florian Westphal wrote:
>>> Currently mptcp adds kernel-based listener socket for all
>>> netlink-configured mptcp address endpoints.
>>>
>>> This has caveats because kernel may interfere with unrelated programs
>>> that use same address/port pairs.
>>>
>>
>> I assume that this refers to a potential race between a kernel listener and
>> the application which Paolo had raised. But I'm not sure if these changes
>> eliminate that possibility. Pasting Paolo's example below from the prior discussion:
>>
>> """
>> s1 = socket()
>> bind(s1, A)
>> listen(s1)
>> // at this point incoming MPTCP connection can be established on s1
>> // and ADD_ADDR sub-options could be sent back
>>
>> s2 = socket()
>> bind(s2, B)
>> listen(s2)
>> """
>>
>> Over a newly established MPTCP connection following listen(s1), the PM can issue an 
>> ADD_ADDR with B. In light of this change there would be no listener created for B. 
>> But if the remote endpoint immediately established a subflow in response (to the 
>> ADD_ADDR), then that would create a subflow (connection) socket at B.
>> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).
> 
> Why would that fail? You can bind x:y even if there is an established
> connection from x:y to q:r.

If I establish an MPTCP connection using mptcp_connect individually as 
Client and Server, then I am unable to bind a 3rd (new) Server process at the Client's 
addr+port [1]. Why is this the case?

> 
>> In other words, a subflow creation at an address could race with a subsequent bind()
>> at that address causing startup issues in the application.
> 
> I don't think so.  Its fairly common to implement "graceful restart/update" via
> "close(listen); fork(); exit();"-sequence. child continues to handle existing
> connections while new process can start & to serve new clients.

What you're saying may be true for a previously bound addr+port that was in the LISTEN 
state at which a new listener may be bound after the old one was closed, and in the 
presence of ongoing connections. 

However, I am viewing this matter in light of your changes wherein a connection 
socket is established without there having been a bound listener at that 
addr+port. In that case, I was wondering if the above situation [1] would apply on a
subsequent bind(). If it does, then we could encounter a race such as I described. 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-03-04  7:36   ` Kishen Maloor
@ 2022-03-08 18:45     ` Florian Westphal
  2022-03-08 23:00       ` Kishen Maloor
  0 siblings, 1 reply; 24+ messages in thread
From: Florian Westphal @ 2022-03-08 18:45 UTC (permalink / raw)
  To: Kishen Maloor; +Cc: Florian Westphal, mptcp

Kishen Maloor <kishen.maloor@intel.com> wrote:
> Hi Florian, Paolo,
> 
> I had responded as below to v2 of this series but wondered if perhaps it got lost
> in the barrage of emails. So thought I'd resend it as I still have questions
> regarding our rationale here.
> 
> On 2/24/22 7:50 AM, Florian Westphal wrote:
> > Currently mptcp adds kernel-based listener socket for all
> > netlink-configured mptcp address endpoints.
> > 
> > This has caveats because kernel may interfere with unrelated programs
> > that use same address/port pairs.
> > 
> 
> I assume that this refers to a potential race between a kernel listener and
> the application which Paolo had raised. But I'm not sure if these changes
> eliminate that possibility. Pasting Paolo's example below from the prior discussion:
> 
> """
> s1 = socket()
> bind(s1, A)
> listen(s1)
> // at this point incoming MPTCP connection can be established on s1
> // and ADD_ADDR sub-options could be sent back
> 
> s2 = socket()
> bind(s2, B)
> listen(s2)
> """
> 
> Over a newly established MPTCP connection following listen(s1), the PM can issue an 
> ADD_ADDR with B. In light of this change there would be no listener created for B. 
> But if the remote endpoint immediately established a subflow in response (to the 
> ADD_ADDR), then that would create a subflow (connection) socket at B.
> It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).

Why would that fail? You can bind x:y even if there is an established
connection from x:y to q:r.

> In other words, a subflow creation at an address could race with a subsequent bind()
> at that address causing startup issues in the application.

I don't think so.  Its fairly common to implement "graceful restart/update" via
"close(listen); fork(); exit();"-sequence. child continues to handle existing
connections while new process can start & to serve new clients.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-24 15:50 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
@ 2022-03-04  7:36   ` Kishen Maloor
  2022-03-08 18:45     ` Florian Westphal
  0 siblings, 1 reply; 24+ messages in thread
From: Kishen Maloor @ 2022-03-04  7:36 UTC (permalink / raw)
  To: Florian Westphal, mptcp

Hi Florian, Paolo,

I had responded as below to v2 of this series but wondered if perhaps it got lost
in the barrage of emails. So thought I'd resend it as I still have questions
regarding our rationale here.

On 2/24/22 7:50 AM, Florian Westphal wrote:
> Currently mptcp adds kernel-based listener socket for all
> netlink-configured mptcp address endpoints.
> 
> This has caveats because kernel may interfere with unrelated programs
> that use same address/port pairs.
> 

I assume that this refers to a potential race between a kernel listener and
the application which Paolo had raised. But I'm not sure if these changes
eliminate that possibility. Pasting Paolo's example below from the prior discussion:

"""
s1 = socket()
bind(s1, A)
listen(s1)
// at this point incoming MPTCP connection can be established on s1
// and ADD_ADDR sub-options could be sent back

s2 = socket()
bind(s2, B)
listen(s2)
"""

Over a newly established MPTCP connection following listen(s1), the PM can issue an 
ADD_ADDR with B. In light of this change there would be no listener created for B. 
But if the remote endpoint immediately established a subflow in response (to the 
ADD_ADDR), then that would create a subflow (connection) socket at B.
It appears (and correct me if I'm wrong) that bind(s2, B) would fail after this point (?).

In other words, a subflow creation at an address could race with a subsequent bind()
at that address causing startup issues in the application.
The only difference now from our prior discussion is that previously the announcement (and 
the act of creating a listener) could race with the bind().

If this assessment is correct, then these changes aren't really sidestepping the 
above race which motivated this alternate approach in the first place. 

Hence, I had the following questions:

What are the benefit(s) of this alternate approach?

If the above race is still a worry, then isn't it a problem that we have no way to
prevent it with this approach? 
Whereas with kernel listeners we could choose to not create a listener with the
netlink API (and thus avoid a race), but in this approach the behavior is baked
into the code and outcomes could vary at runtime.

I do like the fact that we don't need code to manage kernel listeners with this approach
even if it does not resolve the above race, but it comes with other caveats as we've 
been discussing like potential clashes with TCP listeners and TCP code changes that would 
have to be accepted upstream. 

So given the tradeoffs, I am considering the relative merits
of the kernel listener approach: deterministic, documentable behavior and wholly 
contained in the MPTCP layer.

I wanted to take a step back to make sure we're all on the same page as to 
why we're considering these changes. Hope it makes sense :)

> RFC 8684 says:
>  Demultiplexing subflow SYNs MUST be done using the token; this is
>  unlike traditional TCP, where the destination port is used for
>  demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
>  packets is done using the 5-tuple, as in traditional TCP.
> 
> This patch deviates from this in that it retains the existing checks of
> verifying the incoming requests destination vs. the list of announced
> addresses.  If the request is to an address that was not assigned, its
> treated like an invalid token, i.e. we send a tcp reset with mptcp
> error specific code is returned.
> 
> The checks that do this are moved from subflow specific code to the new
> hook, this allows us to perform the check at an earlier stage.
> 
> Furthermore, TCP-only listeners take precedence: An MPTCP peer MUST NOT
> announce addr:port pairs that are already in use by a non-mptcp listener.
> 
> This could be changed, but it requires move of mptcp_handle_join() hook
> *before* the tcp port demux, i.e. an additional conditional in hotpath.
> 
> As-is, the additional conditional (syn && !rst && ...) is placed in the
> 'no socket found' path.
> 
> The pernet "listening" socket is hidden from userspace, its not part of
> any hashes and not bound to any address/port.
> 
> TPROXY-like semantics apply: If tcp demux cannot find a port for a given
> packet, check if the packet is a syn packet with a valid join token.
> 
> If so, the pernet listener is returned and tcp processing resumes.
> Otherwise, handling is identical.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  include/net/mptcp.h  |  19 +++-
>  net/ipv6/tcp_ipv6.c  |  19 ++--
>  net/mptcp/ctrl.c     | 229 ++++++++++++++++++++++++++++++++++++++++++-
>  net/mptcp/protocol.c |   2 +-
>  net/mptcp/protocol.h |   2 +-
>  net/mptcp/subflow.c  |   8 +-
>  6 files changed, 258 insertions(+), 21 deletions(-)
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index b914e63afc13..b8939d7ea12e 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
>  				  struct sk_buff *skb);
>  
>  __be32 mptcp_get_reset_option(const struct sk_buff *skb);
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
>  
>  static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
>  {
> @@ -198,10 +199,20 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
>  	return htonl(0u);
>  }
>  
> -static inline struct sock *mptcp_handle_join4(struct sk_buff *skb)
> +static inline struct sock *mptcp_handle_join(struct sk_buff *skb, int af)
>  {
> +	const struct tcphdr *th = tcp_hdr(skb);
> +
> +	if (th->syn && !th->ack && !th->rst && !th->fin)
> +		return __mptcp_handle_join(af, skb);
> +
>  	return NULL;
>  }
> +
> +static inline struct sock *mptcp_handle_join4(struct sk_buff *skb)
> +{
> +	return mptcp_handle_join(skb, AF_INET);
> +}
>  #else
>  
>  static inline void mptcp_init(void)
> @@ -284,14 +295,18 @@ static inline struct sock *mptcp_handle_join4(struct sk_buff *skb) { return NULL
>  
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>  int mptcpv6_init(void);
> +int mptcpv6_init_net(struct net *net);
> +void mptcpv6_exit_net(struct net *net);
>  void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
>  
>  static inline struct sock *mptcp_handle_join6(struct sk_buff *skb)
>  {
> -	return NULL;
> +	return mptcp_handle_join(skb, AF_INET6);
>  }
>  #elif IS_ENABLED(CONFIG_IPV6)
>  static inline int mptcpv6_init(void) { return 0; }
> +static inline int mptcpv6_init_net(struct net *net) { return 0; }
> +static inline void mptcpv6_exit_net(struct net *net) { }
>  static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
>  static inline struct sock *mptcp_handle_join6(struct sk_buff *skb) { return NULL; }
>  #endif
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 2f7a621aa24d..b414e2f77fa3 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2256,13 +2256,22 @@ static struct inet_protosw tcpv6_protosw = {
>  
>  static int __net_init tcpv6_net_init(struct net *net)
>  {
> -	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> -				    SOCK_RAW, IPPROTO_TCP, net);
> +	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
> +				       SOCK_RAW, IPPROTO_TCP, net);
> +	if (err)
> +		return err;
> +
> +	err = mptcpv6_init_net(net);
> +	if (err)
> +		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +
> +	return err;
>  }
>  
>  static void __net_exit tcpv6_net_exit(struct net *net)
>  {
>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> +	mptcpv6_exit_net(net);
>  }
>  
>  static struct pernet_operations tcpv6_net_ops = {
> @@ -2287,15 +2296,9 @@ int __init tcpv6_init(void)
>  	if (ret)
>  		goto out_tcpv6_protosw;
>  
> -	ret = mptcpv6_init();
> -	if (ret)
> -		goto out_tcpv6_pernet_subsys;
> -
>  out:
>  	return ret;
>  
> -out_tcpv6_pernet_subsys:
> -	unregister_pernet_subsys(&tcpv6_net_ops);
>  out_tcpv6_protosw:
>  	inet6_unregister_protosw(&tcpv6_protosw);
>  out_tcpv6_protocol:
> diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
> index ae20b7d92e28..c7370c5147df 100644
> --- a/net/mptcp/ctrl.c
> +++ b/net/mptcp/ctrl.c
> @@ -12,6 +12,7 @@
>  #include <net/netns/generic.h>
>  
>  #include "protocol.h"
> +#include "mib.h"
>  
>  #define MPTCP_SYSCTL_PATH "net/mptcp"
>  
> @@ -21,6 +22,12 @@ static int mptcp_pernet_id;
>  static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
>  #endif
>  
> +struct mptcp_join_sk {
> +	struct sock *sk;
> +	struct inet_bind_bucket *tb;
> +	struct inet_bind_hashbucket head;
> +};
> +
>  struct mptcp_pernet {
>  #ifdef CONFIG_SYSCTL
>  	struct ctl_table_header *ctl_table_hdr;
> @@ -32,6 +39,18 @@ struct mptcp_pernet {
>  	u8 checksum_enabled;
>  	u8 allow_join_initial_addr_port;
>  	u8 pm_type;
> +
> +	/* pernet listener to handle mptcp join requests
> +	 * based on the mptcp token.
> +	 *
> +	 * Has to be pernet because tcp uses
> +	 * sock_net(sk_listener) to obtain the net namespace for
> +	 * the syn/ack route lookup.
> +	 */
> +	struct mptcp_join_sk join4;
> +#if IS_ENABLED(CONFIG_MPTCP_IPV6)
> +	struct mptcp_join_sk join6;
> +#endif
>  };
>  
>  static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
> @@ -185,13 +204,190 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
>  
>  #endif /* CONFIG_SYSCTL */
>  
> +static void add_mptcp_rst(struct sk_buff *skb)
> +{
> +	struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
> +
> +	if (ext) {
> +		memset(ext, 0, sizeof(*ext));
> +		ext->reset_reason = MPTCP_RST_EMPTCP;
> +	}
> +}
> +
> +struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
> +{
> +	struct mptcp_options_received mp_opt;
> +	struct mptcp_pernet *pernet;
> +	struct mptcp_sock *msk;
> +	struct socket *ssock;
> +	struct sock *lsk;
> +	struct net *net;
> +
> +	/* paranoia check: don't allow 0 destination port,
> +	 * else __inet_inherit_port will insert the child socket
> +	 * into the phony hash slot of the pernet listener.
> +	 */
> +	if (tcp_hdr(skb)->dest == 0)
> +		return NULL;
> +
> +	mptcp_get_options(skb, &mp_opt);
> +
> +	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
> +		return NULL;
> +
> +	net = dev_net(skb_dst(skb)->dev);
> +	if (!mptcp_is_enabled(net))
> +		return NULL;
> +
> +	/* RFC8684: If the token is unknown [..], the receiver will send
> +	 * back a reset (RST) signal, analogous to an unknown port in TCP,
> +	 * containing an MP_TCPRST option (Section 3.6) [..]
> +	 */
> +	msk = mptcp_token_get_sock(net, mp_opt.token);
> +	if (!msk) {
> +		add_mptcp_rst(skb);
> +		return NULL;
> +	}
> +
> +	if (!mptcp_pm_sport_in_anno_list(msk, af, skb)) {
> +		sock_put((struct sock *)msk);
> +		MPTCP_INC_STATS(net, MPTCP_MIB_MISMATCHPORTSYNRX);
> +		add_mptcp_rst(skb);
> +		return NULL;
> +	}
> +
> +	sock_put((struct sock *)msk);
> +	pernet = mptcp_get_pernet(net);
> +
> +	switch (af) {
> +	case AF_INET:
> +		lsk = pernet->join4.sk;
> +		break;
> +#if IS_ENABLED(CONFIG_MPTCP_IPV6)
> +	case AF_INET6:
> +		lsk = pernet->join6.sk;
> +		break;
> +#endif
> +	default:
> +		WARN_ON_ONCE(1);
> +		return NULL;
> +	}
> +
> +	ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
> +	if (WARN_ON(!ssock))
> +		return NULL;
> +
> +	return ssock->sk;
> +}
> +
> +static struct socket *mptcp_create_join_listen_socket(struct net *net, int af)
> +{
> +	struct socket *s, *ssock;
> +	int err;
> +
> +	err = sock_create_kern(net, af, SOCK_STREAM, IPPROTO_MPTCP, &s);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	ssock = __mptcp_nmpc_socket(mptcp_sk(s->sk));
> +	if (!ssock) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	ssock->sk->sk_max_ack_backlog = SOMAXCONN;
> +	inet_sk_state_store(ssock->sk, TCP_LISTEN);
> +
> +	s->sk->sk_max_ack_backlog = SOMAXCONN;
> +	inet_sk_state_store(s->sk, TCP_LISTEN);
> +
> +	s->sk->sk_net_refcnt = 1;
> +	get_net_track(net, &s->sk->ns_tracker, GFP_KERNEL);
> +	sock_inuse_add(net, 1);
> +
> +	return s;
> +out:
> +	sock_release(s);
> +	return ERR_PTR(err);
> +}
> +
> +static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_join_sk *join_sk)
> +{
> +	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(sk));
> +	struct inet_hashinfo *table = ssock->sk->sk_prot->h.hashinfo;
> +	struct inet_bind_bucket *tb;
> +
> +	spin_lock_init(&join_sk->head.lock);
> +	INIT_HLIST_HEAD(&join_sk->head.chain);
> +
> +	/* Our "listen socket" isn't bound to any address or port.
> +	 * Conceptually, SYN packet with mptcp join request are steered to
> +	 * this pernet socket just like TPROXY steals arbitrary connection
> +	 * requests to assign them to listening socket with different
> +	 * address or port.
> +	 *
> +	 * The bind_bucket is needed for sake of __inet_inherit_port(),
> +	 * so it can place the new child socket in the correct
> +	 * bind_bucket slot.
> +	 *
> +	 * A phony head is used to hide this socket from normal sk loookup.
> +	 */
> +	tb = inet_bind_bucket_create(table->bind_bucket_cachep,
> +				     net, &join_sk->head, 0, 0);
> +	if (!tb)
> +		return -ENOMEM;
> +
> +	inet_csk(ssock->sk)->icsk_bind_hash = tb;
> +	return 0;
> +}
> +
>  static int __net_init mptcp_net_init(struct net *net)
>  {
>  	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +	struct socket *sock;
> +	int err;
>  
>  	mptcp_pernet_set_defaults(pernet);
>  
> -	return mptcp_pernet_new_table(net, pernet);
> +	err = mptcp_pernet_new_table(net, pernet);
> +	if (err)
> +		return err;
> +
> +	sock = mptcp_create_join_listen_socket(net, AF_INET);
> +	if (IS_ERR(sock)) {
> +		err = PTR_ERR(sock);
> +		goto out_table;
> +	}
> +
> +	err = mptcp_init_join_sk(net, sock->sk, &pernet->join4);
> +	if (err) {
> +		sock_release(sock);
> +		goto out_table;
> +	}
> +
> +	/* struct sock is still reachable via sock->sk_socket backpointer */
> +	pernet->join4.sk = sock->sk;
> +	return err;
> +
> +out_table:
> +	if (!net_eq(net, &init_net))
> +		mptcp_pernet_del_table(pernet);
> +	return err;
> +}
> +
> +static void __net_exit mptcp_exit_join_sk(struct mptcp_join_sk *jsk)
> +{
> +	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(jsk->sk));
> +	struct inet_bind_bucket *tb;
> +	struct inet_hashinfo *table;
> +
> +	table = ssock->sk->sk_prot->h.hashinfo;
> +
> +	tb = inet_csk(ssock->sk)->icsk_bind_hash;
> +	inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
> +
> +	ssock = jsk->sk->sk_socket;
> +	sock_release(ssock);
>  }
>  
>  /* Note: the callback will only be called per extra netns */
> @@ -200,6 +396,7 @@ static void __net_exit mptcp_net_exit(struct net *net)
>  	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
>  
>  	mptcp_pernet_del_table(pernet);
> +	mptcp_exit_join_sk(&pernet->join4);
>  }
>  
>  static struct pernet_operations mptcp_pernet_ops = {
> @@ -219,12 +416,36 @@ void __init mptcp_init(void)
>  }
>  
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> -int __init mptcpv6_init(void)
> +int __net_init mptcpv6_init_net(struct net *net)
>  {
> +	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +	struct socket *sock;
>  	int err;
>  
> -	err = mptcp_proto_v6_init();
> +	if (net_eq(net, &init_net)) {
> +		err = mptcp_proto_v6_init();
> +		if (err)
> +			return err;
> +	}
> +
> +	sock = mptcp_create_join_listen_socket(net, AF_INET6);
> +	if (IS_ERR(sock))
> +		return PTR_ERR(sock);
>  
> -	return err;
> +	err = mptcp_init_join_sk(net, sock->sk, &pernet->join6);
> +	if (err) {
> +		sock_release(sock);
> +		return err;
> +	}
> +
> +	pernet->join6.sk = sock->sk;
> +	return 0;
> +}
> +
> +void __net_exit mptcpv6_exit_net(struct net *net)
> +{
> +	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
> +
> +	mptcp_exit_join_sk(&pernet->join6);
>  }
>  #endif
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 3cb975227d12..bc7108ed453c 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -3794,7 +3794,7 @@ static struct inet_protosw mptcp_v6_protosw = {
>  	.flags		= INET_PROTOSW_ICSK,
>  };
>  
> -int __init mptcp_proto_v6_init(void)
> +int __net_init mptcp_proto_v6_init(void)
>  {
>  	int err;
>  
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 6b2d7f60c8ad..7ec2513e1c2f 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -648,7 +648,7 @@ static inline bool mptcp_has_another_subflow(struct sock *ssk)
>  
>  void __init mptcp_proto_init(void);
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> -int __init mptcp_proto_v6_init(void);
> +int __net_init mptcp_proto_v6_init(void);
>  #endif
>  
>  struct sock *mptcp_sk_clone(const struct sock *sk,
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 77da5f744a17..67a4c698602d 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -116,6 +116,9 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
>  
>  static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
>  {
> +	if (inet_sk(sk)->inet_sport == 0)
> +		return true;
> +
>  	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
>  }
>  
> @@ -216,11 +219,6 @@ static int subflow_check_req(struct request_sock *req,
>  			pr_debug("syn inet_sport=%d %d",
>  				 ntohs(inet_sk(sk_listener)->inet_sport),
>  				 ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
> -			if (!mptcp_pm_sport_in_anno_list(subflow_req->msk,
> -							 sk_listener->sk_family, skb)) {
> -				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
> -				return -EPERM;
> -			}
>  			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
>  		}
>  


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket
  2022-02-24 15:50 [PATCH mptcp-next v4 0/4] mptcp: replace per-addr listener sockets Florian Westphal
@ 2022-02-24 15:50 ` Florian Westphal
  2022-03-04  7:36   ` Kishen Maloor
  0 siblings, 1 reply; 24+ messages in thread
From: Florian Westphal @ 2022-02-24 15:50 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Currently mptcp adds kernel-based listener socket for all
netlink-configured mptcp address endpoints.

This has caveats because kernel may interfere with unrelated programs
that use same address/port pairs.

RFC 8684 says:
 Demultiplexing subflow SYNs MUST be done using the token; this is
 unlike traditional TCP, where the destination port is used for
 demultiplexing SYN packets.  Once a subflow is set up, demultiplexing
 packets is done using the 5-tuple, as in traditional TCP.

This patch deviates from this in that it retains the existing checks of
verifying the incoming requests destination vs. the list of announced
addresses.  If the request is to an address that was not assigned, its
treated like an invalid token, i.e. we send a tcp reset with mptcp
error specific code is returned.

The checks that do this are moved from subflow specific code to the new
hook, this allows us to perform the check at an earlier stage.

Furthermore, TCP-only listeners take precedence: An MPTCP peer MUST NOT
announce addr:port pairs that are already in use by a non-mptcp listener.

This could be changed, but it requires move of mptcp_handle_join() hook
*before* the tcp port demux, i.e. an additional conditional in hotpath.

As-is, the additional conditional (syn && !rst && ...) is placed in the
'no socket found' path.

The pernet "listening" socket is hidden from userspace, its not part of
any hashes and not bound to any address/port.

TPROXY-like semantics apply: If tcp demux cannot find a port for a given
packet, check if the packet is a syn packet with a valid join token.

If so, the pernet listener is returned and tcp processing resumes.
Otherwise, handling is identical.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h  |  19 +++-
 net/ipv6/tcp_ipv6.c  |  19 ++--
 net/mptcp/ctrl.c     | 229 ++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.c |   2 +-
 net/mptcp/protocol.h |   2 +-
 net/mptcp/subflow.c  |   8 +-
 6 files changed, 258 insertions(+), 21 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index b914e63afc13..b8939d7ea12e 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -189,6 +189,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
 				  struct sk_buff *skb);
 
 __be32 mptcp_get_reset_option(const struct sk_buff *skb);
+struct sock *__mptcp_handle_join(int af, struct sk_buff *skb);
 
 static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 {
@@ -198,10 +199,20 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
 	return htonl(0u);
 }
 
-static inline struct sock *mptcp_handle_join4(struct sk_buff *skb)
+static inline struct sock *mptcp_handle_join(struct sk_buff *skb, int af)
 {
+	const struct tcphdr *th = tcp_hdr(skb);
+
+	if (th->syn && !th->ack && !th->rst && !th->fin)
+		return __mptcp_handle_join(af, skb);
+
 	return NULL;
 }
+
+static inline struct sock *mptcp_handle_join4(struct sk_buff *skb)
+{
+	return mptcp_handle_join(skb, AF_INET);
+}
 #else
 
 static inline void mptcp_init(void)
@@ -284,14 +295,18 @@ static inline struct sock *mptcp_handle_join4(struct sk_buff *skb) { return NULL
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 int mptcpv6_init(void);
+int mptcpv6_init_net(struct net *net);
+void mptcpv6_exit_net(struct net *net);
 void mptcpv6_handle_mapped(struct sock *sk, bool mapped);
 
 static inline struct sock *mptcp_handle_join6(struct sk_buff *skb)
 {
-	return NULL;
+	return mptcp_handle_join(skb, AF_INET6);
 }
 #elif IS_ENABLED(CONFIG_IPV6)
 static inline int mptcpv6_init(void) { return 0; }
+static inline int mptcpv6_init_net(struct net *net) { return 0; }
+static inline void mptcpv6_exit_net(struct net *net) { }
 static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
 static inline struct sock *mptcp_handle_join6(struct sk_buff *skb) { return NULL; }
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2f7a621aa24d..b414e2f77fa3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2256,13 +2256,22 @@ static struct inet_protosw tcpv6_protosw = {
 
 static int __net_init tcpv6_net_init(struct net *net)
 {
-	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
-				    SOCK_RAW, IPPROTO_TCP, net);
+	int err = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+				       SOCK_RAW, IPPROTO_TCP, net);
+	if (err)
+		return err;
+
+	err = mptcpv6_init_net(net);
+	if (err)
+		inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+
+	return err;
 }
 
 static void __net_exit tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+	mptcpv6_exit_net(net);
 }
 
 static struct pernet_operations tcpv6_net_ops = {
@@ -2287,15 +2296,9 @@ int __init tcpv6_init(void)
 	if (ret)
 		goto out_tcpv6_protosw;
 
-	ret = mptcpv6_init();
-	if (ret)
-		goto out_tcpv6_pernet_subsys;
-
 out:
 	return ret;
 
-out_tcpv6_pernet_subsys:
-	unregister_pernet_subsys(&tcpv6_net_ops);
 out_tcpv6_protosw:
 	inet6_unregister_protosw(&tcpv6_protosw);
 out_tcpv6_protocol:
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..c7370c5147df 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -12,6 +12,7 @@
 #include <net/netns/generic.h>
 
 #include "protocol.h"
+#include "mib.h"
 
 #define MPTCP_SYSCTL_PATH "net/mptcp"
 
@@ -21,6 +22,12 @@ static int mptcp_pernet_id;
 static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
 #endif
 
+struct mptcp_join_sk {
+	struct sock *sk;
+	struct inet_bind_bucket *tb;
+	struct inet_bind_hashbucket head;
+};
+
 struct mptcp_pernet {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *ctl_table_hdr;
@@ -32,6 +39,18 @@ struct mptcp_pernet {
 	u8 checksum_enabled;
 	u8 allow_join_initial_addr_port;
 	u8 pm_type;
+
+	/* pernet listener to handle mptcp join requests
+	 * based on the mptcp token.
+	 *
+	 * Has to be pernet because tcp uses
+	 * sock_net(sk_listener) to obtain the net namespace for
+	 * the syn/ack route lookup.
+	 */
+	struct mptcp_join_sk join4;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	struct mptcp_join_sk join6;
+#endif
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -185,13 +204,190 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
 
 #endif /* CONFIG_SYSCTL */
 
+static void add_mptcp_rst(struct sk_buff *skb)
+{
+	struct mptcp_ext *ext = skb_ext_add(skb, SKB_EXT_MPTCP);
+
+	if (ext) {
+		memset(ext, 0, sizeof(*ext));
+		ext->reset_reason = MPTCP_RST_EMPTCP;
+	}
+}
+
+struct sock *__mptcp_handle_join(int af, struct sk_buff *skb)
+{
+	struct mptcp_options_received mp_opt;
+	struct mptcp_pernet *pernet;
+	struct mptcp_sock *msk;
+	struct socket *ssock;
+	struct sock *lsk;
+	struct net *net;
+
+	/* paranoia check: don't allow 0 destination port,
+	 * else __inet_inherit_port will insert the child socket
+	 * into the phony hash slot of the pernet listener.
+	 */
+	if (tcp_hdr(skb)->dest == 0)
+		return NULL;
+
+	mptcp_get_options(skb, &mp_opt);
+
+	if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ))
+		return NULL;
+
+	net = dev_net(skb_dst(skb)->dev);
+	if (!mptcp_is_enabled(net))
+		return NULL;
+
+	/* RFC8684: If the token is unknown [..], the receiver will send
+	 * back a reset (RST) signal, analogous to an unknown port in TCP,
+	 * containing an MP_TCPRST option (Section 3.6) [..]
+	 */
+	msk = mptcp_token_get_sock(net, mp_opt.token);
+	if (!msk) {
+		add_mptcp_rst(skb);
+		return NULL;
+	}
+
+	if (!mptcp_pm_sport_in_anno_list(msk, af, skb)) {
+		sock_put((struct sock *)msk);
+		MPTCP_INC_STATS(net, MPTCP_MIB_MISMATCHPORTSYNRX);
+		add_mptcp_rst(skb);
+		return NULL;
+	}
+
+	sock_put((struct sock *)msk);
+	pernet = mptcp_get_pernet(net);
+
+	switch (af) {
+	case AF_INET:
+		lsk = pernet->join4.sk;
+		break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	case AF_INET6:
+		lsk = pernet->join6.sk;
+		break;
+#endif
+	default:
+		WARN_ON_ONCE(1);
+		return NULL;
+	}
+
+	ssock = __mptcp_nmpc_socket(mptcp_sk(lsk));
+	if (WARN_ON(!ssock))
+		return NULL;
+
+	return ssock->sk;
+}
+
+static struct socket *mptcp_create_join_listen_socket(struct net *net, int af)
+{
+	struct socket *s, *ssock;
+	int err;
+
+	err = sock_create_kern(net, af, SOCK_STREAM, IPPROTO_MPTCP, &s);
+	if (err)
+		return ERR_PTR(err);
+
+	ssock = __mptcp_nmpc_socket(mptcp_sk(s->sk));
+	if (!ssock) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	ssock->sk->sk_max_ack_backlog = SOMAXCONN;
+	inet_sk_state_store(ssock->sk, TCP_LISTEN);
+
+	s->sk->sk_max_ack_backlog = SOMAXCONN;
+	inet_sk_state_store(s->sk, TCP_LISTEN);
+
+	s->sk->sk_net_refcnt = 1;
+	get_net_track(net, &s->sk->ns_tracker, GFP_KERNEL);
+	sock_inuse_add(net, 1);
+
+	return s;
+out:
+	sock_release(s);
+	return ERR_PTR(err);
+}
+
+static int mptcp_init_join_sk(struct net *net, struct sock *sk, struct mptcp_join_sk *join_sk)
+{
+	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(sk));
+	struct inet_hashinfo *table = ssock->sk->sk_prot->h.hashinfo;
+	struct inet_bind_bucket *tb;
+
+	spin_lock_init(&join_sk->head.lock);
+	INIT_HLIST_HEAD(&join_sk->head.chain);
+
+	/* Our "listen socket" isn't bound to any address or port.
+	 * Conceptually, SYN packet with mptcp join request are steered to
+	 * this pernet socket just like TPROXY steals arbitrary connection
+	 * requests to assign them to listening socket with different
+	 * address or port.
+	 *
+	 * The bind_bucket is needed for sake of __inet_inherit_port(),
+	 * so it can place the new child socket in the correct
+	 * bind_bucket slot.
+	 *
+	 * A phony head is used to hide this socket from normal sk loookup.
+	 */
+	tb = inet_bind_bucket_create(table->bind_bucket_cachep,
+				     net, &join_sk->head, 0, 0);
+	if (!tb)
+		return -ENOMEM;
+
+	inet_csk(ssock->sk)->icsk_bind_hash = tb;
+	return 0;
+}
+
 static int __net_init mptcp_net_init(struct net *net)
 {
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+	struct socket *sock;
+	int err;
 
 	mptcp_pernet_set_defaults(pernet);
 
-	return mptcp_pernet_new_table(net, pernet);
+	err = mptcp_pernet_new_table(net, pernet);
+	if (err)
+		return err;
+
+	sock = mptcp_create_join_listen_socket(net, AF_INET);
+	if (IS_ERR(sock)) {
+		err = PTR_ERR(sock);
+		goto out_table;
+	}
+
+	err = mptcp_init_join_sk(net, sock->sk, &pernet->join4);
+	if (err) {
+		sock_release(sock);
+		goto out_table;
+	}
+
+	/* struct sock is still reachable via sock->sk_socket backpointer */
+	pernet->join4.sk = sock->sk;
+	return err;
+
+out_table:
+	if (!net_eq(net, &init_net))
+		mptcp_pernet_del_table(pernet);
+	return err;
+}
+
+static void __net_exit mptcp_exit_join_sk(struct mptcp_join_sk *jsk)
+{
+	struct socket *ssock = __mptcp_nmpc_socket(mptcp_sk(jsk->sk));
+	struct inet_bind_bucket *tb;
+	struct inet_hashinfo *table;
+
+	table = ssock->sk->sk_prot->h.hashinfo;
+
+	tb = inet_csk(ssock->sk)->icsk_bind_hash;
+	inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
+
+	ssock = jsk->sk->sk_socket;
+	sock_release(ssock);
 }
 
 /* Note: the callback will only be called per extra netns */
@@ -200,6 +396,7 @@ static void __net_exit mptcp_net_exit(struct net *net)
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
 
 	mptcp_pernet_del_table(pernet);
+	mptcp_exit_join_sk(&pernet->join4);
 }
 
 static struct pernet_operations mptcp_pernet_ops = {
@@ -219,12 +416,36 @@ void __init mptcp_init(void)
 }
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int __init mptcpv6_init(void)
+int __net_init mptcpv6_init_net(struct net *net)
 {
+	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+	struct socket *sock;
 	int err;
 
-	err = mptcp_proto_v6_init();
+	if (net_eq(net, &init_net)) {
+		err = mptcp_proto_v6_init();
+		if (err)
+			return err;
+	}
+
+	sock = mptcp_create_join_listen_socket(net, AF_INET6);
+	if (IS_ERR(sock))
+		return PTR_ERR(sock);
 
-	return err;
+	err = mptcp_init_join_sk(net, sock->sk, &pernet->join6);
+	if (err) {
+		sock_release(sock);
+		return err;
+	}
+
+	pernet->join6.sk = sock->sk;
+	return 0;
+}
+
+void __net_exit mptcpv6_exit_net(struct net *net)
+{
+	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+
+	mptcp_exit_join_sk(&pernet->join6);
 }
 #endif
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3cb975227d12..bc7108ed453c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3794,7 +3794,7 @@ static struct inet_protosw mptcp_v6_protosw = {
 	.flags		= INET_PROTOSW_ICSK,
 };
 
-int __init mptcp_proto_v6_init(void)
+int __net_init mptcp_proto_v6_init(void)
 {
 	int err;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6b2d7f60c8ad..7ec2513e1c2f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -648,7 +648,7 @@ static inline bool mptcp_has_another_subflow(struct sock *ssk)
 
 void __init mptcp_proto_init(void);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int __init mptcp_proto_v6_init(void);
+int __net_init mptcp_proto_v6_init(void);
 #endif
 
 struct sock *mptcp_sk_clone(const struct sock *sk,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 77da5f744a17..67a4c698602d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -116,6 +116,9 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
 
 static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
 {
+	if (inet_sk(sk)->inet_sport == 0)
+		return true;
+
 	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
 }
 
@@ -216,11 +219,6 @@ static int subflow_check_req(struct request_sock *req,
 			pr_debug("syn inet_sport=%d %d",
 				 ntohs(inet_sk(sk_listener)->inet_sport),
 				 ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
-			if (!mptcp_pm_sport_in_anno_list(subflow_req->msk,
-							 sk_listener->sk_family, skb)) {
-				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
-				return -EPERM;
-			}
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
 		}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2022-03-11  1:16 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-10 15:29 [PATCH mptcp-next 0/4] mptcp: replace per-addr listener sockets Florian Westphal
2022-02-10 15:29 ` [PATCH mptcp-next 1/4] mptcp: prefer ip address in syn skb instead of listen sk bound address Florian Westphal
2022-02-11 10:34   ` Paolo Abeni
2022-02-10 15:29 ` [PATCH mptcp-next 2/4] tcp: add mptcp join demultiplex hooks Florian Westphal
2022-02-10 15:29 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
2022-02-11  2:03   ` Mat Martineau
2022-02-11 11:21     ` Paolo Abeni
2022-02-12  0:08     ` Florian Westphal
2022-02-11 11:03   ` Paolo Abeni
2022-02-12  0:12     ` Florian Westphal
2022-02-11 11:12   ` Matthieu Baerts
2022-02-12  0:13     ` Florian Westphal
2022-02-10 15:29 ` [PATCH mptcp-next 4/4] mptcp: remove per-address listening sockets Florian Westphal
2022-02-24 15:50 [PATCH mptcp-next v4 0/4] mptcp: replace per-addr listener sockets Florian Westphal
2022-02-24 15:50 ` [PATCH mptcp-next 3/4] mptcp: handle join requests via pernet listen socket Florian Westphal
2022-03-04  7:36   ` Kishen Maloor
2022-03-08 18:45     ` Florian Westphal
2022-03-08 23:00       ` Kishen Maloor
2022-03-09 12:53         ` Florian Westphal
2022-03-09 17:40           ` Kishen Maloor
2022-03-09 21:37             ` Florian Westphal
2022-03-09 23:40               ` Kishen Maloor
2022-03-10  0:37                 ` Mat Martineau
2022-03-10  1:27                   ` Kishen Maloor
2022-03-11  1:16               ` Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.