All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] [RFC PATCH v5 08/17] mptcp: Create SUBFLOW socket for incoming connections
@ 2018-12-14 22:27 Mat Martineau
  0 siblings, 0 replies; only message in thread
From: Mat Martineau @ 2018-12-14 22:27 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 15541 bytes --]

From: Peter Krystad <peter.krystad(a)intel.com>

Add subflow_request_sock type that extends tcp_request_sock
and add an is_mptcp flag to tcp_request_sock distinguish them.

Override the listen() and accept() methods of the MPTCP
socket proto_ops so they may act on the subflow socket.

Override the conn_request() and syn_recv_sock() handlers
in the inet_connection_sock to handle incoming MPTCP
SYNs and the ACK to the response SYN.

Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
SYN-ACK response for a subflow_request_sock.

Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
---
 include/linux/tcp.h   |   1 +
 include/net/mptcp.h   |  26 ++++++++++
 include/net/tcp.h     |   1 +
 net/ipv4/tcp_input.c  |   1 +
 net/ipv4/tcp_output.c |  21 +++++++-
 net/mptcp/options.c   |  15 ++++++
 net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
 net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
 8 files changed, 271 insertions(+), 11 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2622817ecd6b..b54ab3b5546a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -148,6 +148,7 @@ struct tcp_request_sock {
 						  * FastOpen it's the seq#
 						  * after data-in-SYN.
 						  */
+	bool				is_mptcp;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index a5c2baeb688f..ced33f1c529e 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -69,6 +69,23 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
 	return (struct subflow_sock *)sk;
 }
 
+struct subflow_request_sock {
+	struct	tcp_request_sock sk;
+	u8	mp_capable : 1,
+		mp_join : 1,
+		checksum : 1,
+		backup : 1,
+		version : 4;
+	u64	local_key;
+	u64	remote_key;
+};
+
+static inline
+struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
+{
+	return (struct subflow_request_sock *)rsk;
+}
+
 #ifdef CONFIG_MPTCP
 
 void mptcp_parse_option(const unsigned char *ptr, int opsize,
@@ -77,6 +94,8 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
 void mptcp_rcv_synsent(struct sock *sk);
 unsigned int mptcp_established_options(struct sock *sk, u64 *local_key,
 				       u64 *remote_key);
+unsigned int mptcp_synack_options(struct request_sock *req,
+				  u64 *local_key, u64 *remote_key);
 
 void mptcp_finish_connect(struct sock *sk, int mp_capable);
 
@@ -104,6 +123,13 @@ static inline void mptcp_rcv_synsent(struct sock *sk)
 {
 }
 
+static inline unsigned int mptcp_synack_options(struct request_sock *sk,
+						u64 *local_key,
+						u64 *remote_key)
+{
+	return 0;
+}
+
 static inline unsigned int mptcp_established_options(struct sock *sk,
 						     u64 *local_key,
 						     u64 *remote_key)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1a408e2f646e..6122e8e36f01 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -216,6 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_MSS_ALIGNED		4
 #define TCPOLEN_EXP_SMC_BASE_ALIGNED	8
 #define TCPOLEN_MPTCP_MPC_SYN		12
+#define TCPOLEN_MPTCP_MPC_SYNACK	20
 #define TCPOLEN_MPTCP_MPC_ACK		20
 
 /* Flags in tp->nonagle */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a77e88bff0e2..9f0a759cff2a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6453,6 +6453,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	tcp_rsk(req)->af_specific = af_ops;
 	tcp_rsk(req)->ts_off = 0;
+	tcp_rsk(req)->is_mptcp = 0;
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = af_ops->mss_clamp;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7ea3ae99c51b..89b339c26f7b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -416,6 +416,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 
 /* MPTCP option subtypes */
 #define OPTION_MPTCP_MPC_SYN	(1 << 0)
+#define OPTION_MPTCP_MPC_SYNACK	(1 << 1)
 #define OPTION_MPTCP_MPC_ACK	(1 << 2)
 
 struct tcp_out_options {
@@ -439,12 +440,15 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
 		return;
 
 	if ((OPTION_MPTCP_MPC_SYN |
+	     OPTION_MPTCP_MPC_SYNACK |
 	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
 		u8 len;
 		__be64 key;
 
 		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
 			len = TCPOLEN_MPTCP_MPC_SYN;
+		else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+			len = TCPOLEN_MPTCP_MPC_SYNACK;
 		else
 			len = TCPOLEN_MPTCP_MPC_ACK;
 
@@ -455,7 +459,8 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
 		key = cpu_to_be64(opts->sndr_key);
 		memcpy((u8 *) ptr, (u8 *) &key, 8);
 		ptr += 2;
-		if (OPTION_MPTCP_MPC_ACK & opts->suboptions) {
+		if ((OPTION_MPTCP_MPC_SYNACK |
+		     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
 			key = cpu_to_be64(opts->rcvr_key);
 			memcpy((u8 *) ptr, (u8 *) &key, 8);
 			ptr += 2;
@@ -763,6 +768,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 			remaining -= need;
 		}
 	}
+	if (tcp_rsk(req)->is_mptcp) {
+		u64 local_key;
+		u64 remote_key;
+		if (mptcp_synack_options(req, &local_key, &remote_key)) {
+			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
+				opts->options |= OPTION_MPTCP;
+				opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
+				opts->sndr_key = local_key;
+				opts->rcvr_key = remote_key;
+				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
+			}
+		}
+	}
+
 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
 
 	return MAX_TCP_OPTION_SPACE - remaining;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b0616f520da0..266a9f7fed0d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -189,3 +189,18 @@ unsigned int mptcp_established_options(struct sock *sk, u64 *local_key,
 	}
 	return 0;
 }
+
+unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
+				  u64 *remote_key)
+{
+	struct subflow_request_sock *subflow_req = subflow_rsk(req);
+
+	pr_debug("subflow_req=%p", subflow_req);
+	if (subflow_req->mp_capable) {
+		*local_key = subflow_req->local_key;
+		*remote_key = subflow_req->remote_key;
+		pr_debug("local_key=%llu", *local_key);
+		pr_debug("remote_key=%llu", *remote_key);
+	}
+	return subflow_req->mp_capable;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1a3412a742ea..9f802f69a528 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
 	}
 }
 
+static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+				 bool kern)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct socket *listener = msk->subflow;
+	struct socket *new_sock;
+	struct socket *mp;
+	struct subflow_sock *subflow;
+
+	pr_debug("msk=%p, listener=%p", msk, listener->sk);
+	*err = kernel_accept(listener, &new_sock, flags);
+	if (*err < 0)
+		return NULL;
+
+	subflow = subflow_sk(new_sock->sk);
+	pr_debug("new_sock=%p", subflow);
+
+	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
+	if (*err < 0) {
+		kernel_sock_shutdown(new_sock, SHUT_RDWR);
+		sock_release(new_sock);
+		return NULL;
+	}
+
+	msk = mptcp_sk(mp->sk);
+	pr_debug("msk=%p", msk);
+	subflow->conn = mp->sk;
+
+	if (subflow->mp_capable) {
+		msk->remote_key = subflow->remote_key;
+		msk->local_key = subflow->local_key;
+		msk->connection_list = new_sock;
+	} else {
+		msk->subflow = new_sock;
+	}
+
+	return mp->sk;
+}
+
 static int mptcp_get_port(struct sock *sk, unsigned short snum)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
 int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
-	struct socket *subflow = msk->subflow;
+	int err;
 
-	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
+	pr_debug("msk=%p", msk);
 
-	return inet_bind(subflow, uaddr, addr_len);
+	if (msk->subflow == NULL) {
+		err = subflow_create(sock->sk);
+		if (err)
+			return err;
+	}
+	return inet_bind(msk->subflow, uaddr, addr_len);
 }
 
 int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
@@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
 }
 
+int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+	struct socket *subflow;
+	int err = -EPERM;
+
+	if (msk->connection_list)
+		subflow = msk->connection_list;
+	else
+		subflow = msk->subflow;
+
+	err = inet_getname(subflow, uaddr, peer);
+
+	return err;
+}
+
+int mptcp_stream_listen(struct socket *sock, int backlog)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+	int err;
+
+	pr_debug("msk=%p", msk);
+
+	if (msk->subflow == NULL) {
+		err = subflow_create(sock->sk);
+		if (err)
+			return err;
+	}
+	return inet_listen(msk->subflow, backlog);
+}
+
+int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
+			bool kern)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+
+	pr_debug("msk=%p", msk);
+
+	if (msk->subflow == NULL) {
+		return -EINVAL;
+	}
+	return inet_accept(sock, newsock, flags, kern);
+}
+
 static struct proto mptcp_prot = {
 	.name		= "MPTCP",
 	.owner		= THIS_MODULE,
 	.init		= mptcp_init_sock,
 	.close		= mptcp_close,
-	.accept		= inet_csk_accept,
+	.accept		= mptcp_accept,
 	.shutdown	= tcp_shutdown,
 	.sendmsg	= mptcp_sendmsg,
 	.recvmsg	= mptcp_recvmsg,
@@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
 	.bind		   = mptcp_stream_bind,
 	.connect	   = mptcp_stream_connect,
 	.socketpair	   = sock_no_socketpair,
-	.accept		   = inet_accept,
-	.getname	   = inet_getname,
+	.accept		   = mptcp_stream_accept,
+	.getname	   = mptcp_stream_getname,
 	.poll		   = tcp_poll,
 	.ioctl		   = inet_ioctl,
-	.listen		   = inet_listen,
+	.listen		   = mptcp_stream_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5e5fdcb3175f..89fcc3b746eb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 }
 
+static void subflow_v4_init_req(struct request_sock *req,
+				const struct sock *sk_listener,
+				struct sk_buff *skb)
+{
+	struct subflow_request_sock *subflow_req = subflow_rsk(req);
+	struct subflow_sock *listener = subflow_sk(sk_listener);
+	struct tcp_options_received rx_opt;
+
+	tcp_rsk(req)->is_mptcp = 1;
+	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
+
+	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
+
+	rx_opt.mptcp.flags = 0;
+	rx_opt.mptcp.mp_capable = 0;
+	rx_opt.mptcp.mp_join = 0;
+	rx_opt.mptcp.dss = 0;
+	mptcp_get_options(skb, &rx_opt);
+
+	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
+		subflow_req->mp_capable = 1;
+		if (rx_opt.mptcp.version >= listener->version)
+			subflow_req->version = listener->version;
+		else
+			subflow_req->version = rx_opt.mptcp.version;
+		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
+		    listener->checksum)
+			subflow_req->checksum = 1;
+		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
+	} else {
+		subflow_req->mp_capable = 0;
+	}
+}
+
 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 {
 	struct subflow_sock *subflow = subflow_sk(sk);
@@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
+static struct request_sock_ops subflow_request_sock_ops;
+static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
+
+static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+
+	pr_debug("subflow=%p", subflow);
+
+	/* Never answer to SYNs sent to broadcast or multicast */
+	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+		goto drop;
+
+	return tcp_conn_request(&subflow_request_sock_ops,
+				&subflow_request_sock_ipv4_ops,
+				sk, skb);
+drop:
+	tcp_listendrop(sk);
+	return 0;
+}
+
+static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+					  struct sk_buff *skb,
+					  struct request_sock *req,
+					  struct dst_entry *dst,
+					  struct request_sock *req_unhash,
+					  bool *own_req)
+{
+	struct subflow_sock *listener = subflow_sk(sk);
+	struct subflow_request_sock *subflow_req = subflow_rsk(req);
+	struct sock *child;
+
+	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
+
+	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
+
+	if (child) {
+		struct subflow_sock *subflow = subflow_sk(child);
+
+		pr_debug("child=%p", child);
+		if (subflow_req->mp_capable) {
+			subflow->mp_capable = 1;
+			subflow->fourth_ack = 1;
+			subflow->remote_key = subflow_req->remote_key;
+			subflow->local_key = subflow_req->local_key;
+		} else {
+			subflow->mp_capable = 0;
+		}
+	}
+
+	return child;
+}
+
 const struct inet_connection_sock_af_ops subflow_specific = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
 	.sk_rx_dst_set	   = subflow_finish_connect,
-	.conn_request	   = tcp_v4_conn_request,
-	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
+	.conn_request	   = subflow_conn_request,
+	.syn_recv_sock	   = subflow_syn_recv_sock,
 	.net_header_len	   = sizeof(struct iphdr),
 	.setsockopt	   = ip_setsockopt,
 	.getsockopt	   = ip_getsockopt,
@@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
 	tcp_close(sk, timeout);
 }
 
+static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
+				   bool kern)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+	struct sock *child;
+
+	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
+
+	child = inet_csk_accept(sk, flags, err, kern);
+
+	pr_debug("child=%p", child);
+
+	return child;
+}
+
 static void subflow_destroy(struct sock *sk)
 {
 	pr_debug("subflow=%p", sk);
@@ -125,7 +227,7 @@ static struct proto subflow_prot = {
 	.close		= subflow_close,
 	.connect	= subflow_connect,
 	.disconnect	= tcp_disconnect,
-	.accept		= inet_csk_accept,
+	.accept		= subflow_accept,
 	.ioctl		= tcp_ioctl,
 	.init		= subflow_init_sock,
 	.destroy	= subflow_destroy,
@@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
 
 	/* TODO: Register path manager callbacks. */
 
+	subflow_request_sock_ops = tcp_request_sock_ops;
+	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
+
+	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
+	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
+
 	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
+	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
 	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
 	err = proto_register(&subflow_prot, 1);
 	if (err)
-- 
2.20.0


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2018-12-14 22:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-14 22:27 [MPTCP] [RFC PATCH v5 08/17] mptcp: Create SUBFLOW socket for incoming connections Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.