All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] [RFC PATCH 07/16] mptcp: Create SUBFLOW socket for outgoing connections
@ 2018-03-28 23:18 Mat Martineau
  0 siblings, 0 replies; only message in thread
From: Mat Martineau @ 2018-03-28 23:18 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 14987 bytes --]

From: Peter Krystad <peter.krystad(a)intel.com>

Override the bind() and connect() methods of the MPTCP
socket so they may act on the subflow socket and use
the .sk_rx_dst_set() handler in the subflow proto to
capture when the responding SYN-ACK is received.

Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
---
 include/net/mptcp.h  |  46 +++++++++++-
 net/mptcp/Makefile   |   2 +-
 net/mptcp/protocol.c | 151 ++++++++++++++++++++++++++++++++++++----
 net/mptcp/subflow.c  | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 376 insertions(+), 15 deletions(-)
 create mode 100644 net/mptcp/subflow.c

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6b4ae84ddf38..981d782cab9a 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -37,7 +37,10 @@
 struct mptcp_sock {
 	/* inet_connection_sock must be the first member */
 	struct	inet_connection_sock sk;
-	struct	socket *subflow;
+	u64	local_key;
+	u64	remote_key;
+	struct	socket *connection_list; /* @@ needs to be a list */
+	struct	socket *subflow; /* outgoing connect, listener or !mp_capable */
 };
 
 static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
@@ -45,6 +48,42 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
 	return (struct mptcp_sock *)sk;
 }
 
+/* MPTCP subflow sock structure */
+struct subflow_sock {
+	/* tcp_sock must be the first member */
+	struct	tcp_sock sk;
+	u64	local_key;
+	u64	remote_key;
+	bool	request_mptcp;	// send MP_CAPABLE
+	bool	checksum;
+	bool	version;
+	bool	mp_capable;	// remote is MPTCP capable
+	bool	fourth_ack;	// send initial DSS
+	struct	sock *conn;     // parent mptcp_sock
+};
+
+static inline struct subflow_sock *subflow_sk(const struct sock *sk)
+{
+	return (struct subflow_sock *)sk;
+}
+
+struct subflow_request_sock {
+	struct	tcp_request_sock sk;
+	u8	mp_capable : 1,
+		mp_join : 1,
+		checksum : 1,
+		backup : 1,
+		version : 4;
+	u64	local_key;
+	u64	remote_key;
+};
+
+static inline
+struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
+{
+	return (struct subflow_request_sock *)rsk;
+}
+
 #ifdef CONFIG_MPTCP
 
 void mptcp_parse_option(const unsigned char *ptr, int opsize,
@@ -53,6 +92,11 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
 unsigned int mptcp_synack_options(struct request_sock *req,
 				  u64 *local_key, u64 *remote_key);
 
+void mptcp_finish_connect(struct sock *sk, int mp_capable);
+
+int mptcp_subflow_init(void);
+void mptcp_subflow_exit(void);
+
 void mptcp_get_options(const struct sk_buff *skb,
 		       struct tcp_options_received *options);
 
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 2bd18e3b9fda..3f0e7163fe80 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_MPTCP) += mptcp.o
 
-mptcp-y := protocol.o options.o
+mptcp-y := protocol.o subflow.o options.o
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 393d214fe531..368854740333 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -20,11 +20,13 @@
 #include <net/inet_common.h>
 #include <net/inet_hashtables.h>
 #include <net/protocol.h>
+#include <net/tcp.h>
 #include <net/mptcp.h>
 
 static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct subflow_sock *subflow = subflow_sk(msk->subflow->sk);
 	int err;
 
 	saddr->sa_family = AF_INET;
@@ -32,18 +34,28 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
 	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
 
 	err = kernel_connect(msk->subflow, saddr, len, 0);
-
-	sk->sk_state = TCP_ESTABLISHED;
-
+	pr_debug("mp_capable=%d", subflow->mp_capable);
+	if (!err) {
+		msk->remote_key = subflow->remote_key;
+		msk->local_key = subflow->local_key;
+		msk->connection_list = msk->subflow;
+		msk->subflow = NULL;
+	}
 	return err;
 }
 
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct socket *subflow = msk->subflow;
+	struct socket *subflow;
 
-	pr_debug("subflow=%p", subflow->sk);
+	if (msk->connection_list) {
+		subflow = msk->connection_list;
+		pr_debug("conn_list->subflow=%p", subflow->sk);
+	} else {
+		subflow = msk->subflow;
+		pr_debug("subflow=%p", subflow->sk);
+	}
 
 	return sock_sendmsg(subflow, msg);
 }
@@ -52,9 +64,15 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			 int nonblock, int flags, int *addr_len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct socket *subflow = msk->subflow;
+	struct socket *subflow;
 
-	pr_debug("subflow=%p", subflow->sk);
+	if (msk->connection_list) {
+		subflow = msk->connection_list;
+		pr_debug("conn_list->subflow=%p", subflow->sk);
+	} else {
+		subflow = msk->subflow;
+		pr_debug("subflow=%p", subflow->sk);
+	}
 
 	return sock_recvmsg(subflow, msg, flags);
 }
@@ -67,11 +85,19 @@ static int mptcp_init_sock(struct sock *sk)
 
 	pr_debug("msk=%p", msk);
 
-	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_SUBFLOW,
 			       &sf);
 	if (!err) {
-		pr_debug("subflow=%p", sf->sk);
+		struct subflow_sock *subflow = subflow_sk(sf->sk);
+
+		pr_debug("subflow=%p", subflow);
+		msk->local_key = 1234567887654321; // @@ fixed for now
 		msk->subflow = sf;
+		subflow->conn = sk;
+		subflow->request_mptcp = 1; // @@ if MPTCP enabled
+		subflow->checksum = 1; // @@ if checksum enabled
+		subflow->version = 0;
+		subflow->local_key = msk->local_key;
 	}
 
 	return err;
@@ -85,6 +111,60 @@ static void mptcp_close(struct sock *sk, long timeout)
 		pr_debug("subflow=%p", msk->subflow->sk);
 		sock_release(msk->subflow);
 	}
+
+	if (msk->connection_list) {
+		pr_debug("conn_list->subflow=%p", msk->connection_list->sk);
+		sock_release(msk->connection_list);
+	}
+}
+
+static int mptcp_get_port(struct sock *sk, unsigned short snum)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct sock *subflow = msk->subflow->sk;
+
+	pr_debug("msk=%p, subflow=%p", sk, subflow);
+
+	return inet_csk_get_port(subflow, snum);
+}
+
+int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+	struct socket *subflow = msk->subflow;
+
+	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
+
+	return inet_bind(subflow, uaddr, addr_len);
+}
+
+void mptcp_finish_connect(struct sock *sk, int mp_capable)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct subflow_sock *subflow = subflow_sk(msk->subflow->sk);
+
+	pr_debug("msk=%p", msk);
+
+	if (mp_capable) {
+		msk->remote_key = subflow->remote_key;
+		msk->local_key = subflow->local_key;
+		msk->connection_list = msk->subflow;
+		msk->subflow = NULL;
+	}
+	sk->sk_state = TCP_ESTABLISHED;
+}
+
+int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+			 int addr_len, int flags)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+	int err;
+
+	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
+
+	err = inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
+
+	return err;
 }
 
 static struct proto mptcp_prot = {
@@ -99,35 +179,80 @@ static struct proto mptcp_prot = {
 	.recvmsg	= mptcp_recvmsg,
 	.hash		= inet_hash,
 	.unhash		= inet_unhash,
-	.get_port	= inet_csk_get_port,
+	.get_port	= mptcp_get_port,
 	.obj_size	= sizeof(struct mptcp_sock),
 	.no_autobind	= 1,
 };
 
+const struct proto_ops mptcp_stream_ops = {
+	.family		   = PF_INET,
+	.owner		   = THIS_MODULE,
+	.release	   = inet_release,
+	.bind		   = mptcp_bind,
+	.connect	   = mptcp_stream_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = inet_accept,
+	.getname	   = inet_getname,
+	.poll		   = tcp_poll,
+	.ioctl		   = inet_ioctl,
+	.listen		   = inet_listen,
+	.shutdown	   = inet_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = inet_sendmsg,
+	.recvmsg	   = inet_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = inet_sendpage,
+	.splice_read	   = tcp_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.peek_len	   = tcp_peek_len,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
+#endif
+};
+
 static struct inet_protosw mptcp_protosw = {
 	.type		= SOCK_STREAM,
 	.protocol	= IPPROTO_MPTCP,
 	.prot		= &mptcp_prot,
-	.ops		= &inet_stream_ops,
+	.ops		= &mptcp_stream_ops,
+	.flags		= INET_PROTOSW_ICSK,
 };
 
 static int __init mptcp_init(void)
 {
 	int err;
 
-	err = proto_register(&mptcp_prot, 1);
+	mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
+
+	err = mptcp_subflow_init();
 	if (err)
-		return err;
+		goto subflow_failed;
+
+	err = proto_register(&mptcp_prot, 1);
+	if (err) {
+		goto proto_failed;
+	}
 
 	inet_register_protosw(&mptcp_protosw);
 
 	return 0;
+
+proto_failed:
+	mptcp_subflow_exit();
+
+subflow_failed:
+	return err;
 }
 
 static void __exit mptcp_exit(void)
 {
 	inet_unregister_protosw(&mptcp_protosw);
 	proto_unregister(&mptcp_prot);
+
+	mptcp_subflow_exit();
 }
 
 module_init(mptcp_init);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
new file mode 100644
index 000000000000..5e5fdcb3175f
--- /dev/null
+++ b/net/mptcp/subflow.c
@@ -0,0 +1,192 @@
+/*
+ * Multipath TCP
+ *
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/mptcp.h>
+
+static int subflow_connect(struct sock *sk, struct sockaddr *saddr, int len)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+
+	saddr->sa_family = AF_INET; // @@ presume IPv4 for now
+
+	pr_debug("subflow=%p", subflow);
+
+	return tcp_v4_connect(sk, saddr, len);
+}
+
+static int subflow_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+
+	pr_debug("subflow=%p", subflow);
+
+	return tcp_sendmsg(sk, msg, len);
+}
+
+static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			   int nonblock, int flags, int *addr_len)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+
+	pr_debug("subflow=%p", subflow);
+
+	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+}
+
+static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+
+	inet_sk_rx_dst_set(sk, skb);
+
+	pr_debug("subflow=%p", subflow);
+
+	if (subflow->conn) {
+		pr_debug("remote_key=%llu", subflow->remote_key);
+		mptcp_finish_connect(subflow->conn, subflow->mp_capable);
+		subflow->conn = NULL;
+	}
+}
+
+const struct inet_connection_sock_af_ops subflow_specific = {
+	.queue_xmit	   = ip_queue_xmit,
+	.send_check	   = tcp_v4_send_check,
+	.rebuild_header	   = inet_sk_rebuild_header,
+	.sk_rx_dst_set	   = subflow_finish_connect,
+	.conn_request	   = tcp_v4_conn_request,
+	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
+	.net_header_len	   = sizeof(struct iphdr),
+	.setsockopt	   = ip_setsockopt,
+	.getsockopt	   = ip_getsockopt,
+	.addr2sockaddr	   = inet_csk_addr2sockaddr,
+	.sockaddr_len	   = sizeof(struct sockaddr_in),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ip_setsockopt,
+	.compat_getsockopt = compat_ip_getsockopt,
+#endif
+	.mtu_reduced	   = tcp_v4_mtu_reduced,
+};
+
+static int subflow_init_sock(struct sock *sk)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+	struct tcp_sock *tsk = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int err;
+
+	pr_debug("subflow=%p", subflow);
+
+	err = tcp_v4_init_sock(sk);
+	if (!err) { // @@ AND mptcp is enabled
+		tsk->is_mptcp = 1;
+		icsk->icsk_af_ops = &subflow_specific;
+	}
+
+	return err;
+}
+
+static void subflow_close(struct sock *sk, long timeout)
+{
+	pr_debug("subflow=%p", sk);
+
+	tcp_close(sk, timeout);
+}
+
+static void subflow_destroy(struct sock *sk)
+{
+	pr_debug("subflow=%p", sk);
+
+	tcp_v4_destroy_sock(sk);
+}
+
+static struct proto subflow_prot = {
+	.name		= "SUBFLOW",
+	.owner		= THIS_MODULE,
+	.close		= subflow_close,
+	.connect	= subflow_connect,
+	.disconnect	= tcp_disconnect,
+	.accept		= inet_csk_accept,
+	.ioctl		= tcp_ioctl,
+	.init		= subflow_init_sock,
+	.destroy	= subflow_destroy,
+	.shutdown	= tcp_shutdown,
+	.keepalive	= tcp_set_keepalive,
+	.recvmsg	= subflow_recvmsg,
+	.sendmsg	= subflow_sendmsg,
+	.sendpage	= tcp_sendpage,
+	.backlog_rcv	= tcp_v4_do_rcv,
+	.release_cb	= tcp_release_cb,
+	.hash		= inet_hash,
+	.unhash		= inet_unhash,
+	.get_port	= inet_csk_get_port,
+	.enter_memory_pressure	= tcp_enter_memory_pressure,
+	.stream_memory_free	= tcp_stream_memory_free,
+	.sockets_allocated	= &tcp_sockets_allocated,
+	.orphan_count		= &tcp_orphan_count,
+	.memory_allocated	= &tcp_memory_allocated,
+	.memory_pressure	= &tcp_memory_pressure,
+	.sysctl_mem		= sysctl_tcp_mem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
+	.max_header		= MAX_TCP_HEADER,
+	.obj_size		= sizeof(struct subflow_sock),
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
+
+	.no_autobind		= true,
+};
+
+static struct inet_protosw subflow_protosw = {
+	.type		= SOCK_STREAM,
+	.protocol	= IPPROTO_SUBFLOW,
+	.prot		= &subflow_prot,
+	.ops		= &inet_stream_ops,
+	.flags		= INET_PROTOSW_ICSK,
+};
+
+int mptcp_subflow_init(void)
+{
+	int err = -ENOMEM;
+
+	/* TODO: Register path manager callbacks. */
+
+	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
+	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
+	err = proto_register(&subflow_prot, 1);
+	if (err)
+		goto fail;
+
+	inet_register_protosw(&subflow_protosw);
+
+	return 0;
+
+fail:
+	return err;
+}
+
+void mptcp_subflow_exit(void)
+{
+	inet_unregister_protosw(&subflow_protosw);
+	proto_unregister(&subflow_prot);
+}
+
+MODULE_LICENSE("GPL");
-- 
2.16.3


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2018-03-28 23:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-28 23:18 [MPTCP] [RFC PATCH 07/16] mptcp: Create SUBFLOW socket for outgoing connections Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.