All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs
@ 2018-10-05 22:59 Mat Martineau
  0 siblings, 0 replies; 6+ messages in thread
From: Mat Martineau @ 2018-10-05 22:59 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 7051 bytes --]

From: Peter Krystad <peter.krystad(a)intel.com>

Implements the infrastructure for MPTCP sockets.

MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
sockets are only managed by the MPTCP socket that owns them and are not
visible from userspace. This commit allows a userspace program to open
an MPTCP socket with:

  sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);

The resulting socket is simply a wrapper around a single regular TCP
socket, without any of the MPTCP protocol implemented over the wire.

Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
---
 include/net/mptcp.h  |  33 ++++++++++
 net/Kconfig          |   1 +
 net/Makefile         |   1 +
 net/mptcp/Kconfig    |  10 ++++
 net/mptcp/Makefile   |   3 +
 net/mptcp/protocol.c | 139 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 187 insertions(+)
 create mode 100644 include/net/mptcp.h
 create mode 100644 net/mptcp/Kconfig
 create mode 100644 net/mptcp/Makefile
 create mode 100644 net/mptcp/protocol.c

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
new file mode 100644
index 000000000000..7f7b18b000fe
--- /dev/null
+++ b/include/net/mptcp.h
@@ -0,0 +1,33 @@
+/*
+ * Multipath TCP
+ *
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NET_MPTCP_H
+#define __NET_MPTCP_H
+
+#include <linux/tcp.h>
+
+/* MPTCP connection sock */
+struct mptcp_sock {
+	/* inet_connection_sock must be the first member */
+	struct	inet_connection_sock sk;
+	struct	socket *subflow;
+};
+
+static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
+{
+	return (struct mptcp_sock *)sk;
+}
+
+#endif /* __NET_MPTCP_H */
diff --git a/net/Kconfig b/net/Kconfig
index 228dfa382eec..274282e9b742 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -89,6 +89,7 @@ if INET
 source "net/ipv4/Kconfig"
 source "net/ipv6/Kconfig"
 source "net/netlabel/Kconfig"
+source "net/mptcp/Kconfig"
 
 endif # if INET
 
diff --git a/net/Makefile b/net/Makefile
index bdaf53925acd..1673aab222d8 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -87,3 +87,4 @@ endif
 obj-$(CONFIG_QRTR)		+= qrtr/
 obj-$(CONFIG_NET_NCSI)		+= ncsi/
 obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
+obj-$(CONFIG_MPTCP)		+= mptcp/
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
new file mode 100644
index 000000000000..8e48190e5fed
--- /dev/null
+++ b/net/mptcp/Kconfig
@@ -0,0 +1,10 @@
+
+config MPTCP
+	bool "Multipath TCP"
+	depends on INET
+	---help---
+	  Multipath TCP (MPTCP) connections send and receive data over multiple
+	  subflows in order to utilize multiple network paths. Each subflow
+	  uses the TCP protocol, and TCP options carry header information for
+	  MPTCP.
+
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
new file mode 100644
index 000000000000..5624e7d51d48
--- /dev/null
+++ b/net/mptcp/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MPTCP) += mptcp.o
+
+mptcp-y := protocol.o
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
new file mode 100644
index 000000000000..c1eb4afc3ca4
--- /dev/null
+++ b/net/mptcp/protocol.c
@@ -0,0 +1,139 @@
+/*
+ * Multipath TCP
+ *
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/mptcp.h>
+
+static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct socket *subflow = msk->subflow;
+
+	pr_debug("subflow=%p", subflow->sk);
+
+	return sock_sendmsg(subflow, msg);
+}
+
+static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			 int nonblock, int flags, int *addr_len)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct socket *subflow = msk->subflow;
+
+	pr_debug("subflow=%p", subflow->sk);
+
+	return sock_recvmsg(subflow, msg, flags);
+}
+
+static int mptcp_init_sock(struct sock *sk)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct socket *sf;
+	int err;
+
+	pr_debug("msk=%p", msk);
+
+	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+			       &sf);
+	if (!err) {
+		pr_debug("subflow=%p", sf->sk);
+		msk->subflow = sf;
+	}
+
+	return err;
+}
+
+static void mptcp_close(struct sock *sk, long timeout)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	if (msk->subflow) {
+		pr_debug("subflow=%p", msk->subflow->sk);
+		sock_release(msk->subflow);
+	}
+}
+
+static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	int err;
+
+	saddr->sa_family = AF_INET;
+
+	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
+
+	err = kernel_connect(msk->subflow, saddr, len, 0);
+
+	sk->sk_state = TCP_ESTABLISHED;
+
+	return err;
+}
+
+static struct proto mptcp_prot = {
+	.name		= "MPTCP",
+	.owner		= THIS_MODULE,
+	.init		= mptcp_init_sock,
+	.close		= mptcp_close,
+	.accept		= inet_csk_accept,
+	.connect	= mptcp_connect,
+	.shutdown	= tcp_shutdown,
+	.sendmsg	= mptcp_sendmsg,
+	.recvmsg	= mptcp_recvmsg,
+	.hash		= inet_hash,
+	.unhash		= inet_unhash,
+	.get_port	= inet_csk_get_port,
+	.obj_size	= sizeof(struct mptcp_sock),
+	.no_autobind	= 1,
+};
+
+static struct inet_protosw mptcp_protosw = {
+	.type		= SOCK_STREAM,
+	.protocol	= IPPROTO_MPTCP,
+	.prot		= &mptcp_prot,
+	.ops		= &inet_stream_ops,
+};
+
+static int __init mptcp_init(void)
+{
+	int err;
+
+	err = proto_register(&mptcp_prot, 1);
+	if (err)
+		return err;
+
+	inet_register_protosw(&mptcp_protosw);
+
+	return 0;
+}
+
+static void __exit mptcp_exit(void)
+{
+	inet_unregister_protosw(&mptcp_protosw);
+	proto_unregister(&mptcp_prot);
+}
+
+module_init(mptcp_init);
+module_exit(mptcp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_MPTCP);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_MPTCP);
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs
@ 2018-10-10  0:16 Mat Martineau
  0 siblings, 0 replies; 6+ messages in thread
From: Mat Martineau @ 2018-10-10  0:16 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 12083 bytes --]

On Mon, 8 Oct 2018, Christoph Paasch wrote:

> On 08/10/18 - 12:48:48, Mat Martineau wrote:
>>
>> On Mon, 8 Oct 2018, Christoph Paasch wrote:
>>
>>> On 05/10/18 - 15:59:05, Mat Martineau wrote:
>>>> From: Peter Krystad <peter.krystad(a)intel.com>
>>>>
>>>> Implements the infrastructure for MPTCP sockets.
>>>>
>>>> MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
>>>> sockets are only managed by the MPTCP socket that owns them and are not
>>>> visible from userspace. This commit allows a userspace program to open
>>>> an MPTCP socket with:
>>>>
>>>>   sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
>>>
>>> Have you considered AF_MULTIPATH instead?
>>>
>>> Because, MPTCP does not really care about IP-address family. Thus, it might
>>> make sense here.
>>
>> Yes - but it's been a while since we discussed it.
>>
>> My first attempt at implementing the socket infrastructure did take the
>> AF_MULTIPATH approach, and I quickly learned a few things:
>>
>> 1. When I tried to write a test program using python's socket module, I
>> found that a lot of TCP-like behavior depended on using AF_INET/AF_INET6, so
>> nothing worked with a non-standard address family. It did, however, handle a
>> non-standard proto well. As I considered what this meant (just a python
>> quirk, or something bigger?), it did seem that the IPPROTO_MPTCP approach
>> would be easier to adapt to existing programs in general.
>
> That might be. We need to look into how a few programs are structured.
>
> The only one I really deeply looked at is ATS, and there I don't think it would be
> a big deal.
>
>> 2. The generic functionality provided by the kernel's af_inet infrastructure
>> is useful for MPTCP
>
> What do you mean by that? I mean, having a separate AF_* does not prevent us
> from using a separate struct proto_ops.
>
> Having AF_MULTIPATH even allows to have our own struct net_proto_family
> which could be intersting if there are bits in inet_create that we don't
> need.

After digging through some code and old email, I think there was some 
quirk in the early implementations that allowed us to use IPPROTO_MPTCP 
for both the subflows and the MPTCP-level socket. As the code looks now, 
AF_INET doesn't buy us much any more. Please disregard :)

Thinking of similarities between AF_MULTIPATH and AF_KCM, they can both be 
thought of as socket types that are built on top of TCP sockets, but 
aren't directly layered on IP.

I did remember something else that works nicely with IPPROTO_MPTCP: that 
idea from the NetDev conference to add a BPF hook to make TCP connections 
in a given cgroup default to MPTCP.

>
>> 3. Various combinations of AF_INET, AF_INET6, and the IPV6_V6ONLY socket
>> option provide a way to control v4/v6 selection for the initial subflow.
>
> v4/v6 selection is already driven through the connect() address-family
> and/or bind() (on the listener side).
>
>> and whether later subflows may be mixed v4/v6.
>>
>> 4. MPTCP is a layer above IP and does have a natural place grouped with
>> TCP/UDP/UDP-Lite/SCTP/etc. under AF_INET/AF_INET6.
>
> Fair enough.
>
>> Some time later I found this post with a paragraph about AF_SMC (vs AF_INET
>> plus an option) that made some good points:
>>
>> https://lwn.net/Articles/723123/
>
> Yeah - seems like the biggest pain-point is that AF_SMC did not have
> IPv6-support baked in and it's more an afterthought.
>
>
> In the end, I think transitioning from AF_MULTIPATH to IPPROTO_MPTCP or the
> other way around won't be a big deal and it's a thing we can leave fairly
> open-ended. We should document the advantages/disadvantages of each.

I agree.


Thanks,

Mat


>
>>
>>
>> I can update the wiki design page if we're in agreement about this aspect of
>> the API. If we're not in agreement yet, let's work on that!
>>
>>
>> Mat
>>
>>
>>
>>>
>>>>
>>>> The resulting socket is simply a wrapper around a single regular TCP
>>>> socket, without any of the MPTCP protocol implemented over the wire.
>>>>
>>>> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
>>>> ---
>>>>  include/net/mptcp.h  |  33 ++++++++++
>>>>  net/Kconfig          |   1 +
>>>>  net/Makefile         |   1 +
>>>>  net/mptcp/Kconfig    |  10 ++++
>>>>  net/mptcp/Makefile   |   3 +
>>>>  net/mptcp/protocol.c | 139 +++++++++++++++++++++++++++++++++++++++++++
>>>>  6 files changed, 187 insertions(+)
>>>>  create mode 100644 include/net/mptcp.h
>>>>  create mode 100644 net/mptcp/Kconfig
>>>>  create mode 100644 net/mptcp/Makefile
>>>>  create mode 100644 net/mptcp/protocol.c
>>>>
>>>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
>>>> new file mode 100644
>>>> index 000000000000..7f7b18b000fe
>>>> --- /dev/null
>>>> +++ b/include/net/mptcp.h
>>>> @@ -0,0 +1,33 @@
>>>> +/*
>>>> + * Multipath TCP
>>>> + *
>>>> + * Copyright (c) 2017, Intel Corporation.
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or modify it
>>>> + * under the terms and conditions of the GNU General Public License,
>>>> + * version 2, as published by the Free Software Foundation.
>>>> + *
>>>> + * This program is distributed in the hope it will be useful, but WITHOUT
>>>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>>>> + * more details.
>>>> + */
>>>> +
>>>> +#ifndef __NET_MPTCP_H
>>>> +#define __NET_MPTCP_H
>>>> +
>>>> +#include <linux/tcp.h>
>>>> +
>>>> +/* MPTCP connection sock */
>>>> +struct mptcp_sock {
>>>> +	/* inet_connection_sock must be the first member */
>>>> +	struct	inet_connection_sock sk;
>>>> +	struct	socket *subflow;
>>>> +};
>>>> +
>>>> +static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
>>>> +{
>>>> +	return (struct mptcp_sock *)sk;
>>>> +}
>>>> +
>>>> +#endif /* __NET_MPTCP_H */
>>>> diff --git a/net/Kconfig b/net/Kconfig
>>>> index 228dfa382eec..274282e9b742 100644
>>>> --- a/net/Kconfig
>>>> +++ b/net/Kconfig
>>>> @@ -89,6 +89,7 @@ if INET
>>>>  source "net/ipv4/Kconfig"
>>>>  source "net/ipv6/Kconfig"
>>>>  source "net/netlabel/Kconfig"
>>>> +source "net/mptcp/Kconfig"
>>>>
>>>>  endif # if INET
>>>>
>>>> diff --git a/net/Makefile b/net/Makefile
>>>> index bdaf53925acd..1673aab222d8 100644
>>>> --- a/net/Makefile
>>>> +++ b/net/Makefile
>>>> @@ -87,3 +87,4 @@ endif
>>>>  obj-$(CONFIG_QRTR)		+= qrtr/
>>>>  obj-$(CONFIG_NET_NCSI)		+= ncsi/
>>>>  obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
>>>> +obj-$(CONFIG_MPTCP)		+= mptcp/
>>>> diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
>>>> new file mode 100644
>>>> index 000000000000..8e48190e5fed
>>>> --- /dev/null
>>>> +++ b/net/mptcp/Kconfig
>>>> @@ -0,0 +1,10 @@
>>>> +
>>>> +config MPTCP
>>>> +	bool "Multipath TCP"
>>>> +	depends on INET
>>>> +	---help---
>>>> +	  Multipath TCP (MPTCP) connections send and receive data over multiple
>>>> +	  subflows in order to utilize multiple network paths. Each subflow
>>>> +	  uses the TCP protocol, and TCP options carry header information for
>>>> +	  MPTCP.
>>>> +
>>>> diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
>>>> new file mode 100644
>>>> index 000000000000..5624e7d51d48
>>>> --- /dev/null
>>>> +++ b/net/mptcp/Makefile
>>>> @@ -0,0 +1,3 @@
>>>> +obj-$(CONFIG_MPTCP) += mptcp.o
>>>> +
>>>> +mptcp-y := protocol.o
>>>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>>>> new file mode 100644
>>>> index 000000000000..c1eb4afc3ca4
>>>> --- /dev/null
>>>> +++ b/net/mptcp/protocol.c
>>>> @@ -0,0 +1,139 @@
>>>> +/*
>>>> + * Multipath TCP
>>>> + *
>>>> + * Copyright (c) 2017, Intel Corporation.
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or modify it
>>>> + * under the terms and conditions of the GNU General Public License,
>>>> + * version 2, as published by the Free Software Foundation.
>>>> + *
>>>> + * This program is distributed in the hope it will be useful, but WITHOUT
>>>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>>>> + * more details.
>>>> + */
>>>> +
>>>> +#include <linux/kernel.h>
>>>> +#include <linux/module.h>
>>>> +#include <linux/netdevice.h>
>>>> +#include <net/sock.h>
>>>> +#include <net/inet_common.h>
>>>> +#include <net/inet_hashtables.h>
>>>> +#include <net/protocol.h>
>>>> +#include <net/tcp.h>
>>>> +#include <net/mptcp.h>
>>>> +
>>>> +static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>>>> +{
>>>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>>>> +	struct socket *subflow = msk->subflow;
>>>> +
>>>> +	pr_debug("subflow=%p", subflow->sk);
>>>> +
>>>> +	return sock_sendmsg(subflow, msg);
>>>> +}
>>>> +
>>>> +static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>>>> +			 int nonblock, int flags, int *addr_len)
>>>> +{
>>>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>>>> +	struct socket *subflow = msk->subflow;
>>>> +
>>>> +	pr_debug("subflow=%p", subflow->sk);
>>>> +
>>>> +	return sock_recvmsg(subflow, msg, flags);
>>>> +}
>>>> +
>>>> +static int mptcp_init_sock(struct sock *sk)
>>>> +{
>>>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>>>> +	struct socket *sf;
>>>> +	int err;
>>>> +
>>>> +	pr_debug("msk=%p", msk);
>>>> +
>>>> +	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
>>>> +			       &sf);
>>>> +	if (!err) {
>>>> +		pr_debug("subflow=%p", sf->sk);
>>>> +		msk->subflow = sf;
>>>> +	}
>>>> +
>>>> +	return err;
>>>> +}
>>>> +
>>>> +static void mptcp_close(struct sock *sk, long timeout)
>>>> +{
>>>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>>>> +
>>>> +	if (msk->subflow) {
>>>> +		pr_debug("subflow=%p", msk->subflow->sk);
>>>> +		sock_release(msk->subflow);
>>>> +	}
>>>> +}
>>>> +
>>>> +static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
>>>> +{
>>>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>>>> +	int err;
>>>> +
>>>> +	saddr->sa_family = AF_INET;
>>>> +
>>>> +	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
>>>> +
>>>> +	err = kernel_connect(msk->subflow, saddr, len, 0);
>>>> +
>>>> +	sk->sk_state = TCP_ESTABLISHED;
>>>> +
>>>> +	return err;
>>>> +}
>>>> +
>>>> +static struct proto mptcp_prot = {
>>>> +	.name		= "MPTCP",
>>>> +	.owner		= THIS_MODULE,
>>>> +	.init		= mptcp_init_sock,
>>>> +	.close		= mptcp_close,
>>>> +	.accept		= inet_csk_accept,
>>>> +	.connect	= mptcp_connect,
>>>> +	.shutdown	= tcp_shutdown,
>>>> +	.sendmsg	= mptcp_sendmsg,
>>>> +	.recvmsg	= mptcp_recvmsg,
>>>> +	.hash		= inet_hash,
>>>> +	.unhash		= inet_unhash,
>>>> +	.get_port	= inet_csk_get_port,
>>>> +	.obj_size	= sizeof(struct mptcp_sock),
>>>> +	.no_autobind	= 1,
>>>> +};
>>>> +
>>>> +static struct inet_protosw mptcp_protosw = {
>>>> +	.type		= SOCK_STREAM,
>>>> +	.protocol	= IPPROTO_MPTCP,
>>>> +	.prot		= &mptcp_prot,
>>>> +	.ops		= &inet_stream_ops,
>>>> +};
>>>> +
>>>> +static int __init mptcp_init(void)
>>>> +{
>>>> +	int err;
>>>> +
>>>> +	err = proto_register(&mptcp_prot, 1);
>>>> +	if (err)
>>>> +		return err;
>>>> +
>>>> +	inet_register_protosw(&mptcp_protosw);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void __exit mptcp_exit(void)
>>>> +{
>>>> +	inet_unregister_protosw(&mptcp_protosw);
>>>> +	proto_unregister(&mptcp_prot);
>>>> +}
>>>> +
>>>> +module_init(mptcp_init);
>>>> +module_exit(mptcp_exit);
>>>> +
>>>> +MODULE_LICENSE("GPL");
>>>> +MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_MPTCP);
>>>> +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_MPTCP);
>>>> --
>>>> 2.19.1
>>>>
>>>> _______________________________________________
>>>> mptcp mailing list
>>>> mptcp(a)lists.01.org
>>>> https://lists.01.org/mailman/listinfo/mptcp
>>>
>>
>> --
>> Mat Martineau
>> Intel OTC
>

--
Mat Martineau
Intel OTC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs
@ 2018-10-08 20:47 Christoph Paasch
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Paasch @ 2018-10-08 20:47 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 11528 bytes --]

On 08/10/18 - 12:48:48, Mat Martineau wrote:
> 
> On Mon, 8 Oct 2018, Christoph Paasch wrote:
> 
> > On 05/10/18 - 15:59:05, Mat Martineau wrote:
> > > From: Peter Krystad <peter.krystad(a)intel.com>
> > > 
> > > Implements the infrastructure for MPTCP sockets.
> > > 
> > > MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
> > > sockets are only managed by the MPTCP socket that owns them and are not
> > > visible from userspace. This commit allows a userspace program to open
> > > an MPTCP socket with:
> > > 
> > >   sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
> > 
> > Have you considered AF_MULTIPATH instead?
> > 
> > Because, MPTCP does not really care about IP-address family. Thus, it might
> > make sense here.
> 
> Yes - but it's been a while since we discussed it.
> 
> My first attempt at implementing the socket infrastructure did take the
> AF_MULTIPATH approach, and I quickly learned a few things:
> 
> 1. When I tried to write a test program using python's socket module, I
> found that a lot of TCP-like behavior depended on using AF_INET/AF_INET6, so
> nothing worked with a non-standard address family. It did, however, handle a
> non-standard proto well. As I considered what this meant (just a python
> quirk, or something bigger?), it did seem that the IPPROTO_MPTCP approach
> would be easier to adapt to existing programs in general.

That might be. We need to look into how a few programs are structured.

The only one I really deeply looked at is ATS, and there I don't think it would be
a big deal.

> 2. The generic functionality provided by the kernel's af_inet infrastructure
> is useful for MPTCP

What do you mean by that? I mean, having a separate AF_* does not prevent us
from using a separate struct proto_ops.

Having AF_MULTIPATH even allows to have our own struct net_proto_family
which could be intersting if there are bits in inet_create that we don't
need.

> 3. Various combinations of AF_INET, AF_INET6, and the IPV6_V6ONLY socket
> option provide a way to control v4/v6 selection for the initial subflow.

v4/v6 selection is already driven through the connect() address-family
and/or bind() (on the listener side).

> and whether later subflows may be mixed v4/v6.
> 
> 4. MPTCP is a layer above IP and does have a natural place grouped with
> TCP/UDP/UDP-Lite/SCTP/etc. under AF_INET/AF_INET6.

Fair enough.

> Some time later I found this post with a paragraph about AF_SMC (vs AF_INET
> plus an option) that made some good points:
> 
> https://lwn.net/Articles/723123/

Yeah - seems like the biggest pain-point is that AF_SMC did not have
IPv6-support baked in and it's more an afterthought.


In the end, I think transitioning from AF_MULTIPATH to IPPROTO_MPTCP or the
other way around won't be a big deal and it's a thing we can leave fairly
open-ended. We should document the advantages/disadvantages of each.


Christoph

> 
> 
> I can update the wiki design page if we're in agreement about this aspect of
> the API. If we're not in agreement yet, let's work on that!
> 
> 
> Mat
> 
> 
> 
> > 
> > > 
> > > The resulting socket is simply a wrapper around a single regular TCP
> > > socket, without any of the MPTCP protocol implemented over the wire.
> > > 
> > > Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
> > > ---
> > >  include/net/mptcp.h  |  33 ++++++++++
> > >  net/Kconfig          |   1 +
> > >  net/Makefile         |   1 +
> > >  net/mptcp/Kconfig    |  10 ++++
> > >  net/mptcp/Makefile   |   3 +
> > >  net/mptcp/protocol.c | 139 +++++++++++++++++++++++++++++++++++++++++++
> > >  6 files changed, 187 insertions(+)
> > >  create mode 100644 include/net/mptcp.h
> > >  create mode 100644 net/mptcp/Kconfig
> > >  create mode 100644 net/mptcp/Makefile
> > >  create mode 100644 net/mptcp/protocol.c
> > > 
> > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > > new file mode 100644
> > > index 000000000000..7f7b18b000fe
> > > --- /dev/null
> > > +++ b/include/net/mptcp.h
> > > @@ -0,0 +1,33 @@
> > > +/*
> > > + * Multipath TCP
> > > + *
> > > + * Copyright (c) 2017, Intel Corporation.
> > > + *
> > > + * This program is free software; you can redistribute it and/or modify it
> > > + * under the terms and conditions of the GNU General Public License,
> > > + * version 2, as published by the Free Software Foundation.
> > > + *
> > > + * This program is distributed in the hope it will be useful, but WITHOUT
> > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > > + * more details.
> > > + */
> > > +
> > > +#ifndef __NET_MPTCP_H
> > > +#define __NET_MPTCP_H
> > > +
> > > +#include <linux/tcp.h>
> > > +
> > > +/* MPTCP connection sock */
> > > +struct mptcp_sock {
> > > +	/* inet_connection_sock must be the first member */
> > > +	struct	inet_connection_sock sk;
> > > +	struct	socket *subflow;
> > > +};
> > > +
> > > +static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
> > > +{
> > > +	return (struct mptcp_sock *)sk;
> > > +}
> > > +
> > > +#endif /* __NET_MPTCP_H */
> > > diff --git a/net/Kconfig b/net/Kconfig
> > > index 228dfa382eec..274282e9b742 100644
> > > --- a/net/Kconfig
> > > +++ b/net/Kconfig
> > > @@ -89,6 +89,7 @@ if INET
> > >  source "net/ipv4/Kconfig"
> > >  source "net/ipv6/Kconfig"
> > >  source "net/netlabel/Kconfig"
> > > +source "net/mptcp/Kconfig"
> > > 
> > >  endif # if INET
> > > 
> > > diff --git a/net/Makefile b/net/Makefile
> > > index bdaf53925acd..1673aab222d8 100644
> > > --- a/net/Makefile
> > > +++ b/net/Makefile
> > > @@ -87,3 +87,4 @@ endif
> > >  obj-$(CONFIG_QRTR)		+= qrtr/
> > >  obj-$(CONFIG_NET_NCSI)		+= ncsi/
> > >  obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
> > > +obj-$(CONFIG_MPTCP)		+= mptcp/
> > > diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
> > > new file mode 100644
> > > index 000000000000..8e48190e5fed
> > > --- /dev/null
> > > +++ b/net/mptcp/Kconfig
> > > @@ -0,0 +1,10 @@
> > > +
> > > +config MPTCP
> > > +	bool "Multipath TCP"
> > > +	depends on INET
> > > +	---help---
> > > +	  Multipath TCP (MPTCP) connections send and receive data over multiple
> > > +	  subflows in order to utilize multiple network paths. Each subflow
> > > +	  uses the TCP protocol, and TCP options carry header information for
> > > +	  MPTCP.
> > > +
> > > diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
> > > new file mode 100644
> > > index 000000000000..5624e7d51d48
> > > --- /dev/null
> > > +++ b/net/mptcp/Makefile
> > > @@ -0,0 +1,3 @@
> > > +obj-$(CONFIG_MPTCP) += mptcp.o
> > > +
> > > +mptcp-y := protocol.o
> > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > > new file mode 100644
> > > index 000000000000..c1eb4afc3ca4
> > > --- /dev/null
> > > +++ b/net/mptcp/protocol.c
> > > @@ -0,0 +1,139 @@
> > > +/*
> > > + * Multipath TCP
> > > + *
> > > + * Copyright (c) 2017, Intel Corporation.
> > > + *
> > > + * This program is free software; you can redistribute it and/or modify it
> > > + * under the terms and conditions of the GNU General Public License,
> > > + * version 2, as published by the Free Software Foundation.
> > > + *
> > > + * This program is distributed in the hope it will be useful, but WITHOUT
> > > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > > + * more details.
> > > + */
> > > +
> > > +#include <linux/kernel.h>
> > > +#include <linux/module.h>
> > > +#include <linux/netdevice.h>
> > > +#include <net/sock.h>
> > > +#include <net/inet_common.h>
> > > +#include <net/inet_hashtables.h>
> > > +#include <net/protocol.h>
> > > +#include <net/tcp.h>
> > > +#include <net/mptcp.h>
> > > +
> > > +static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > +	struct socket *subflow = msk->subflow;
> > > +
> > > +	pr_debug("subflow=%p", subflow->sk);
> > > +
> > > +	return sock_sendmsg(subflow, msg);
> > > +}
> > > +
> > > +static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> > > +			 int nonblock, int flags, int *addr_len)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > +	struct socket *subflow = msk->subflow;
> > > +
> > > +	pr_debug("subflow=%p", subflow->sk);
> > > +
> > > +	return sock_recvmsg(subflow, msg, flags);
> > > +}
> > > +
> > > +static int mptcp_init_sock(struct sock *sk)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > +	struct socket *sf;
> > > +	int err;
> > > +
> > > +	pr_debug("msk=%p", msk);
> > > +
> > > +	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
> > > +			       &sf);
> > > +	if (!err) {
> > > +		pr_debug("subflow=%p", sf->sk);
> > > +		msk->subflow = sf;
> > > +	}
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +static void mptcp_close(struct sock *sk, long timeout)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > +
> > > +	if (msk->subflow) {
> > > +		pr_debug("subflow=%p", msk->subflow->sk);
> > > +		sock_release(msk->subflow);
> > > +	}
> > > +}
> > > +
> > > +static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > +	int err;
> > > +
> > > +	saddr->sa_family = AF_INET;
> > > +
> > > +	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
> > > +
> > > +	err = kernel_connect(msk->subflow, saddr, len, 0);
> > > +
> > > +	sk->sk_state = TCP_ESTABLISHED;
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +static struct proto mptcp_prot = {
> > > +	.name		= "MPTCP",
> > > +	.owner		= THIS_MODULE,
> > > +	.init		= mptcp_init_sock,
> > > +	.close		= mptcp_close,
> > > +	.accept		= inet_csk_accept,
> > > +	.connect	= mptcp_connect,
> > > +	.shutdown	= tcp_shutdown,
> > > +	.sendmsg	= mptcp_sendmsg,
> > > +	.recvmsg	= mptcp_recvmsg,
> > > +	.hash		= inet_hash,
> > > +	.unhash		= inet_unhash,
> > > +	.get_port	= inet_csk_get_port,
> > > +	.obj_size	= sizeof(struct mptcp_sock),
> > > +	.no_autobind	= 1,
> > > +};
> > > +
> > > +static struct inet_protosw mptcp_protosw = {
> > > +	.type		= SOCK_STREAM,
> > > +	.protocol	= IPPROTO_MPTCP,
> > > +	.prot		= &mptcp_prot,
> > > +	.ops		= &inet_stream_ops,
> > > +};
> > > +
> > > +static int __init mptcp_init(void)
> > > +{
> > > +	int err;
> > > +
> > > +	err = proto_register(&mptcp_prot, 1);
> > > +	if (err)
> > > +		return err;
> > > +
> > > +	inet_register_protosw(&mptcp_protosw);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static void __exit mptcp_exit(void)
> > > +{
> > > +	inet_unregister_protosw(&mptcp_protosw);
> > > +	proto_unregister(&mptcp_prot);
> > > +}
> > > +
> > > +module_init(mptcp_init);
> > > +module_exit(mptcp_exit);
> > > +
> > > +MODULE_LICENSE("GPL");
> > > +MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_MPTCP);
> > > +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_MPTCP);
> > > --
> > > 2.19.1
> > > 
> > > _______________________________________________
> > > mptcp mailing list
> > > mptcp(a)lists.01.org
> > > https://lists.01.org/mailman/listinfo/mptcp
> > 
> 
> --
> Mat Martineau
> Intel OTC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs
@ 2018-10-08 20:12 Krystad, Peter
  0 siblings, 0 replies; 6+ messages in thread
From: Krystad, Peter @ 2018-10-08 20:12 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 8728 bytes --]


On Mon, 2018-10-08 at 11:11 -0700, Christoph Paasch wrote:
> On 05/10/18 - 15:59:05, Mat Martineau wrote:
> > From: Peter Krystad <peter.krystad(a)intel.com>
> > 
> > Implements the infrastructure for MPTCP sockets.
> > 
> > MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
> > sockets are only managed by the MPTCP socket that owns them and are not
> > visible from userspace. This commit allows a userspace program to open
> > an MPTCP socket with:
> > 
> >   sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
> 
> Have you considered AF_MULTIPATH instead?

No. But that is a good suggestion.

Peter.


> Because, MPTCP does not really care about IP-address family. Thus, it might
> make sense here.
> 
> 
> Christoph
> 
> > 
> > The resulting socket is simply a wrapper around a single regular TCP
> > socket, without any of the MPTCP protocol implemented over the wire.
> > 
> > Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
> > ---
> >  include/net/mptcp.h  |  33 ++++++++++
> >  net/Kconfig          |   1 +
> >  net/Makefile         |   1 +
> >  net/mptcp/Kconfig    |  10 ++++
> >  net/mptcp/Makefile   |   3 +
> >  net/mptcp/protocol.c | 139 +++++++++++++++++++++++++++++++++++++++++++
> >  6 files changed, 187 insertions(+)
> >  create mode 100644 include/net/mptcp.h
> >  create mode 100644 net/mptcp/Kconfig
> >  create mode 100644 net/mptcp/Makefile
> >  create mode 100644 net/mptcp/protocol.c
> > 
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > new file mode 100644
> > index 000000000000..7f7b18b000fe
> > --- /dev/null
> > +++ b/include/net/mptcp.h
> > @@ -0,0 +1,33 @@
> > +/*
> > + * Multipath TCP
> > + *
> > + * Copyright (c) 2017, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > + * more details.
> > + */
> > +
> > +#ifndef __NET_MPTCP_H
> > +#define __NET_MPTCP_H
> > +
> > +#include <linux/tcp.h>
> > +
> > +/* MPTCP connection sock */
> > +struct mptcp_sock {
> > +	/* inet_connection_sock must be the first member */
> > +	struct	inet_connection_sock sk;
> > +	struct	socket *subflow;
> > +};
> > +
> > +static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
> > +{
> > +	return (struct mptcp_sock *)sk;
> > +}
> > +
> > +#endif /* __NET_MPTCP_H */
> > diff --git a/net/Kconfig b/net/Kconfig
> > index 228dfa382eec..274282e9b742 100644
> > --- a/net/Kconfig
> > +++ b/net/Kconfig
> > @@ -89,6 +89,7 @@ if INET
> >  source "net/ipv4/Kconfig"
> >  source "net/ipv6/Kconfig"
> >  source "net/netlabel/Kconfig"
> > +source "net/mptcp/Kconfig"
> >  
> >  endif # if INET
> >  
> > diff --git a/net/Makefile b/net/Makefile
> > index bdaf53925acd..1673aab222d8 100644
> > --- a/net/Makefile
> > +++ b/net/Makefile
> > @@ -87,3 +87,4 @@ endif
> >  obj-$(CONFIG_QRTR)		+= qrtr/
> >  obj-$(CONFIG_NET_NCSI)		+= ncsi/
> >  obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
> > +obj-$(CONFIG_MPTCP)		+= mptcp/
> > diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
> > new file mode 100644
> > index 000000000000..8e48190e5fed
> > --- /dev/null
> > +++ b/net/mptcp/Kconfig
> > @@ -0,0 +1,10 @@
> > +
> > +config MPTCP
> > +	bool "Multipath TCP"
> > +	depends on INET
> > +	---help---
> > +	  Multipath TCP (MPTCP) connections send and receive data over multiple
> > +	  subflows in order to utilize multiple network paths. Each subflow
> > +	  uses the TCP protocol, and TCP options carry header information for
> > +	  MPTCP.
> > +
> > diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
> > new file mode 100644
> > index 000000000000..5624e7d51d48
> > --- /dev/null
> > +++ b/net/mptcp/Makefile
> > @@ -0,0 +1,3 @@
> > +obj-$(CONFIG_MPTCP) += mptcp.o
> > +
> > +mptcp-y := protocol.o
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > new file mode 100644
> > index 000000000000..c1eb4afc3ca4
> > --- /dev/null
> > +++ b/net/mptcp/protocol.c
> > @@ -0,0 +1,139 @@
> > +/*
> > + * Multipath TCP
> > + *
> > + * Copyright (c) 2017, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > + * more details.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/netdevice.h>
> > +#include <net/sock.h>
> > +#include <net/inet_common.h>
> > +#include <net/inet_hashtables.h>
> > +#include <net/protocol.h>
> > +#include <net/tcp.h>
> > +#include <net/mptcp.h>
> > +
> > +static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	struct socket *subflow = msk->subflow;
> > +
> > +	pr_debug("subflow=%p", subflow->sk);
> > +
> > +	return sock_sendmsg(subflow, msg);
> > +}
> > +
> > +static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> > +			 int nonblock, int flags, int *addr_len)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	struct socket *subflow = msk->subflow;
> > +
> > +	pr_debug("subflow=%p", subflow->sk);
> > +
> > +	return sock_recvmsg(subflow, msg, flags);
> > +}
> > +
> > +static int mptcp_init_sock(struct sock *sk)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	struct socket *sf;
> > +	int err;
> > +
> > +	pr_debug("msk=%p", msk);
> > +
> > +	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
> > +			       &sf);
> > +	if (!err) {
> > +		pr_debug("subflow=%p", sf->sk);
> > +		msk->subflow = sf;
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +static void mptcp_close(struct sock *sk, long timeout)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +
> > +	if (msk->subflow) {
> > +		pr_debug("subflow=%p", msk->subflow->sk);
> > +		sock_release(msk->subflow);
> > +	}
> > +}
> > +
> > +static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	int err;
> > +
> > +	saddr->sa_family = AF_INET;
> > +
> > +	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
> > +
> > +	err = kernel_connect(msk->subflow, saddr, len, 0);
> > +
> > +	sk->sk_state = TCP_ESTABLISHED;
> > +
> > +	return err;
> > +}
> > +
> > +static struct proto mptcp_prot = {
> > +	.name		= "MPTCP",
> > +	.owner		= THIS_MODULE,
> > +	.init		= mptcp_init_sock,
> > +	.close		= mptcp_close,
> > +	.accept		= inet_csk_accept,
> > +	.connect	= mptcp_connect,
> > +	.shutdown	= tcp_shutdown,
> > +	.sendmsg	= mptcp_sendmsg,
> > +	.recvmsg	= mptcp_recvmsg,
> > +	.hash		= inet_hash,
> > +	.unhash		= inet_unhash,
> > +	.get_port	= inet_csk_get_port,
> > +	.obj_size	= sizeof(struct mptcp_sock),
> > +	.no_autobind	= 1,
> > +};
> > +
> > +static struct inet_protosw mptcp_protosw = {
> > +	.type		= SOCK_STREAM,
> > +	.protocol	= IPPROTO_MPTCP,
> > +	.prot		= &mptcp_prot,
> > +	.ops		= &inet_stream_ops,
> > +};
> > +
> > +static int __init mptcp_init(void)
> > +{
> > +	int err;
> > +
> > +	err = proto_register(&mptcp_prot, 1);
> > +	if (err)
> > +		return err;
> > +
> > +	inet_register_protosw(&mptcp_protosw);
> > +
> > +	return 0;
> > +}
> > +
> > +static void __exit mptcp_exit(void)
> > +{
> > +	inet_unregister_protosw(&mptcp_protosw);
> > +	proto_unregister(&mptcp_prot);
> > +}
> > +
> > +module_init(mptcp_init);
> > +module_exit(mptcp_exit);
> > +
> > +MODULE_LICENSE("GPL");
> > +MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_MPTCP);
> > +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_MPTCP);
> > -- 
> > 2.19.1
> > 
> > _______________________________________________
> > mptcp mailing list
> > mptcp(a)lists.01.org
> > https://lists.01.org/mailman/listinfo/mptcp
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs
@ 2018-10-08 19:48 Mat Martineau
  0 siblings, 0 replies; 6+ messages in thread
From: Mat Martineau @ 2018-10-08 19:48 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 9637 bytes --]


On Mon, 8 Oct 2018, Christoph Paasch wrote:

> On 05/10/18 - 15:59:05, Mat Martineau wrote:
>> From: Peter Krystad <peter.krystad(a)intel.com>
>>
>> Implements the infrastructure for MPTCP sockets.
>>
>> MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
>> sockets are only managed by the MPTCP socket that owns them and are not
>> visible from userspace. This commit allows a userspace program to open
>> an MPTCP socket with:
>>
>>   sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
>
> Have you considered AF_MULTIPATH instead?
>
> Because, MPTCP does not really care about IP-address family. Thus, it might
> make sense here.

Yes - but it's been a while since we discussed it.

My first attempt at implementing the socket infrastructure did take the 
AF_MULTIPATH approach, and I quickly learned a few things:

1. When I tried to write a test program using python's socket module, I 
found that a lot of TCP-like behavior depended on using AF_INET/AF_INET6, 
so nothing worked with a non-standard address family. It did, however, 
handle a non-standard proto well. As I considered what this meant (just a 
python quirk, or something bigger?), it did seem that the IPPROTO_MPTCP 
approach would be easier to adapt to existing programs in general.

2. The generic functionality provided by the kernel's af_inet 
infrastructure is useful for MPTCP

3. Various combinations of AF_INET, AF_INET6, and the IPV6_V6ONLY socket 
option provide a way to control v4/v6 selection for the initial subflow 
and whether later subflows may be mixed v4/v6.

4. MPTCP is a layer above IP and does have a natural place grouped with 
TCP/UDP/UDP-Lite/SCTP/etc. under AF_INET/AF_INET6.

Some time later I found this post with a paragraph about AF_SMC (vs 
AF_INET plus an option) that made some good points:

https://lwn.net/Articles/723123/


I can update the wiki design page if we're in agreement about this aspect 
of the API. If we're not in agreement yet, let's work on that!


Mat



>
>>
>> The resulting socket is simply a wrapper around a single regular TCP
>> socket, without any of the MPTCP protocol implemented over the wire.
>>
>> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
>> ---
>>  include/net/mptcp.h  |  33 ++++++++++
>>  net/Kconfig          |   1 +
>>  net/Makefile         |   1 +
>>  net/mptcp/Kconfig    |  10 ++++
>>  net/mptcp/Makefile   |   3 +
>>  net/mptcp/protocol.c | 139 +++++++++++++++++++++++++++++++++++++++++++
>>  6 files changed, 187 insertions(+)
>>  create mode 100644 include/net/mptcp.h
>>  create mode 100644 net/mptcp/Kconfig
>>  create mode 100644 net/mptcp/Makefile
>>  create mode 100644 net/mptcp/protocol.c
>>
>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
>> new file mode 100644
>> index 000000000000..7f7b18b000fe
>> --- /dev/null
>> +++ b/include/net/mptcp.h
>> @@ -0,0 +1,33 @@
>> +/*
>> + * Multipath TCP
>> + *
>> + * Copyright (c) 2017, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + */
>> +
>> +#ifndef __NET_MPTCP_H
>> +#define __NET_MPTCP_H
>> +
>> +#include <linux/tcp.h>
>> +
>> +/* MPTCP connection sock */
>> +struct mptcp_sock {
>> +	/* inet_connection_sock must be the first member */
>> +	struct	inet_connection_sock sk;
>> +	struct	socket *subflow;
>> +};
>> +
>> +static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
>> +{
>> +	return (struct mptcp_sock *)sk;
>> +}
>> +
>> +#endif /* __NET_MPTCP_H */
>> diff --git a/net/Kconfig b/net/Kconfig
>> index 228dfa382eec..274282e9b742 100644
>> --- a/net/Kconfig
>> +++ b/net/Kconfig
>> @@ -89,6 +89,7 @@ if INET
>>  source "net/ipv4/Kconfig"
>>  source "net/ipv6/Kconfig"
>>  source "net/netlabel/Kconfig"
>> +source "net/mptcp/Kconfig"
>>
>>  endif # if INET
>>
>> diff --git a/net/Makefile b/net/Makefile
>> index bdaf53925acd..1673aab222d8 100644
>> --- a/net/Makefile
>> +++ b/net/Makefile
>> @@ -87,3 +87,4 @@ endif
>>  obj-$(CONFIG_QRTR)		+= qrtr/
>>  obj-$(CONFIG_NET_NCSI)		+= ncsi/
>>  obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
>> +obj-$(CONFIG_MPTCP)		+= mptcp/
>> diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
>> new file mode 100644
>> index 000000000000..8e48190e5fed
>> --- /dev/null
>> +++ b/net/mptcp/Kconfig
>> @@ -0,0 +1,10 @@
>> +
>> +config MPTCP
>> +	bool "Multipath TCP"
>> +	depends on INET
>> +	---help---
>> +	  Multipath TCP (MPTCP) connections send and receive data over multiple
>> +	  subflows in order to utilize multiple network paths. Each subflow
>> +	  uses the TCP protocol, and TCP options carry header information for
>> +	  MPTCP.
>> +
>> diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
>> new file mode 100644
>> index 000000000000..5624e7d51d48
>> --- /dev/null
>> +++ b/net/mptcp/Makefile
>> @@ -0,0 +1,3 @@
>> +obj-$(CONFIG_MPTCP) += mptcp.o
>> +
>> +mptcp-y := protocol.o
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> new file mode 100644
>> index 000000000000..c1eb4afc3ca4
>> --- /dev/null
>> +++ b/net/mptcp/protocol.c
>> @@ -0,0 +1,139 @@
>> +/*
>> + * Multipath TCP
>> + *
>> + * Copyright (c) 2017, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/module.h>
>> +#include <linux/netdevice.h>
>> +#include <net/sock.h>
>> +#include <net/inet_common.h>
>> +#include <net/inet_hashtables.h>
>> +#include <net/protocol.h>
>> +#include <net/tcp.h>
>> +#include <net/mptcp.h>
>> +
>> +static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +	struct socket *subflow = msk->subflow;
>> +
>> +	pr_debug("subflow=%p", subflow->sk);
>> +
>> +	return sock_sendmsg(subflow, msg);
>> +}
>> +
>> +static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>> +			 int nonblock, int flags, int *addr_len)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +	struct socket *subflow = msk->subflow;
>> +
>> +	pr_debug("subflow=%p", subflow->sk);
>> +
>> +	return sock_recvmsg(subflow, msg, flags);
>> +}
>> +
>> +static int mptcp_init_sock(struct sock *sk)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +	struct socket *sf;
>> +	int err;
>> +
>> +	pr_debug("msk=%p", msk);
>> +
>> +	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
>> +			       &sf);
>> +	if (!err) {
>> +		pr_debug("subflow=%p", sf->sk);
>> +		msk->subflow = sf;
>> +	}
>> +
>> +	return err;
>> +}
>> +
>> +static void mptcp_close(struct sock *sk, long timeout)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> +	if (msk->subflow) {
>> +		pr_debug("subflow=%p", msk->subflow->sk);
>> +		sock_release(msk->subflow);
>> +	}
>> +}
>> +
>> +static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +	int err;
>> +
>> +	saddr->sa_family = AF_INET;
>> +
>> +	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
>> +
>> +	err = kernel_connect(msk->subflow, saddr, len, 0);
>> +
>> +	sk->sk_state = TCP_ESTABLISHED;
>> +
>> +	return err;
>> +}
>> +
>> +static struct proto mptcp_prot = {
>> +	.name		= "MPTCP",
>> +	.owner		= THIS_MODULE,
>> +	.init		= mptcp_init_sock,
>> +	.close		= mptcp_close,
>> +	.accept		= inet_csk_accept,
>> +	.connect	= mptcp_connect,
>> +	.shutdown	= tcp_shutdown,
>> +	.sendmsg	= mptcp_sendmsg,
>> +	.recvmsg	= mptcp_recvmsg,
>> +	.hash		= inet_hash,
>> +	.unhash		= inet_unhash,
>> +	.get_port	= inet_csk_get_port,
>> +	.obj_size	= sizeof(struct mptcp_sock),
>> +	.no_autobind	= 1,
>> +};
>> +
>> +static struct inet_protosw mptcp_protosw = {
>> +	.type		= SOCK_STREAM,
>> +	.protocol	= IPPROTO_MPTCP,
>> +	.prot		= &mptcp_prot,
>> +	.ops		= &inet_stream_ops,
>> +};
>> +
>> +static int __init mptcp_init(void)
>> +{
>> +	int err;
>> +
>> +	err = proto_register(&mptcp_prot, 1);
>> +	if (err)
>> +		return err;
>> +
>> +	inet_register_protosw(&mptcp_protosw);
>> +
>> +	return 0;
>> +}
>> +
>> +static void __exit mptcp_exit(void)
>> +{
>> +	inet_unregister_protosw(&mptcp_protosw);
>> +	proto_unregister(&mptcp_prot);
>> +}
>> +
>> +module_init(mptcp_init);
>> +module_exit(mptcp_exit);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_MPTCP);
>> +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_MPTCP);
>> --
>> 2.19.1
>>
>> _______________________________________________
>> mptcp mailing list
>> mptcp(a)lists.01.org
>> https://lists.01.org/mailman/listinfo/mptcp
>

--
Mat Martineau
Intel OTC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs
@ 2018-10-08 18:11 Christoph Paasch
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Paasch @ 2018-10-08 18:11 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 7922 bytes --]

On 05/10/18 - 15:59:05, Mat Martineau wrote:
> From: Peter Krystad <peter.krystad(a)intel.com>
> 
> Implements the infrastructure for MPTCP sockets.
> 
> MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
> sockets are only managed by the MPTCP socket that owns them and are not
> visible from userspace. This commit allows a userspace program to open
> an MPTCP socket with:
> 
>   sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);

Have you considered AF_MULTIPATH instead?

Because, MPTCP does not really care about IP-address family. Thus, it might
make sense here.


Christoph

> 
> The resulting socket is simply a wrapper around a single regular TCP
> socket, without any of the MPTCP protocol implemented over the wire.
> 
> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
> ---
>  include/net/mptcp.h  |  33 ++++++++++
>  net/Kconfig          |   1 +
>  net/Makefile         |   1 +
>  net/mptcp/Kconfig    |  10 ++++
>  net/mptcp/Makefile   |   3 +
>  net/mptcp/protocol.c | 139 +++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 187 insertions(+)
>  create mode 100644 include/net/mptcp.h
>  create mode 100644 net/mptcp/Kconfig
>  create mode 100644 net/mptcp/Makefile
>  create mode 100644 net/mptcp/protocol.c
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> new file mode 100644
> index 000000000000..7f7b18b000fe
> --- /dev/null
> +++ b/include/net/mptcp.h
> @@ -0,0 +1,33 @@
> +/*
> + * Multipath TCP
> + *
> + * Copyright (c) 2017, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef __NET_MPTCP_H
> +#define __NET_MPTCP_H
> +
> +#include <linux/tcp.h>
> +
> +/* MPTCP connection sock */
> +struct mptcp_sock {
> +	/* inet_connection_sock must be the first member */
> +	struct	inet_connection_sock sk;
> +	struct	socket *subflow;
> +};
> +
> +static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
> +{
> +	return (struct mptcp_sock *)sk;
> +}
> +
> +#endif /* __NET_MPTCP_H */
> diff --git a/net/Kconfig b/net/Kconfig
> index 228dfa382eec..274282e9b742 100644
> --- a/net/Kconfig
> +++ b/net/Kconfig
> @@ -89,6 +89,7 @@ if INET
>  source "net/ipv4/Kconfig"
>  source "net/ipv6/Kconfig"
>  source "net/netlabel/Kconfig"
> +source "net/mptcp/Kconfig"
>  
>  endif # if INET
>  
> diff --git a/net/Makefile b/net/Makefile
> index bdaf53925acd..1673aab222d8 100644
> --- a/net/Makefile
> +++ b/net/Makefile
> @@ -87,3 +87,4 @@ endif
>  obj-$(CONFIG_QRTR)		+= qrtr/
>  obj-$(CONFIG_NET_NCSI)		+= ncsi/
>  obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
> +obj-$(CONFIG_MPTCP)		+= mptcp/
> diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
> new file mode 100644
> index 000000000000..8e48190e5fed
> --- /dev/null
> +++ b/net/mptcp/Kconfig
> @@ -0,0 +1,10 @@
> +
> +config MPTCP
> +	bool "Multipath TCP"
> +	depends on INET
> +	---help---
> +	  Multipath TCP (MPTCP) connections send and receive data over multiple
> +	  subflows in order to utilize multiple network paths. Each subflow
> +	  uses the TCP protocol, and TCP options carry header information for
> +	  MPTCP.
> +
> diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
> new file mode 100644
> index 000000000000..5624e7d51d48
> --- /dev/null
> +++ b/net/mptcp/Makefile
> @@ -0,0 +1,3 @@
> +obj-$(CONFIG_MPTCP) += mptcp.o
> +
> +mptcp-y := protocol.o
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> new file mode 100644
> index 000000000000..c1eb4afc3ca4
> --- /dev/null
> +++ b/net/mptcp/protocol.c
> @@ -0,0 +1,139 @@
> +/*
> + * Multipath TCP
> + *
> + * Copyright (c) 2017, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/netdevice.h>
> +#include <net/sock.h>
> +#include <net/inet_common.h>
> +#include <net/inet_hashtables.h>
> +#include <net/protocol.h>
> +#include <net/tcp.h>
> +#include <net/mptcp.h>
> +
> +static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct socket *subflow = msk->subflow;
> +
> +	pr_debug("subflow=%p", subflow->sk);
> +
> +	return sock_sendmsg(subflow, msg);
> +}
> +
> +static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> +			 int nonblock, int flags, int *addr_len)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct socket *subflow = msk->subflow;
> +
> +	pr_debug("subflow=%p", subflow->sk);
> +
> +	return sock_recvmsg(subflow, msg, flags);
> +}
> +
> +static int mptcp_init_sock(struct sock *sk)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct socket *sf;
> +	int err;
> +
> +	pr_debug("msk=%p", msk);
> +
> +	err = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
> +			       &sf);
> +	if (!err) {
> +		pr_debug("subflow=%p", sf->sk);
> +		msk->subflow = sf;
> +	}
> +
> +	return err;
> +}
> +
> +static void mptcp_close(struct sock *sk, long timeout)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	if (msk->subflow) {
> +		pr_debug("subflow=%p", msk->subflow->sk);
> +		sock_release(msk->subflow);
> +	}
> +}
> +
> +static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	int err;
> +
> +	saddr->sa_family = AF_INET;
> +
> +	pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
> +
> +	err = kernel_connect(msk->subflow, saddr, len, 0);
> +
> +	sk->sk_state = TCP_ESTABLISHED;
> +
> +	return err;
> +}
> +
> +static struct proto mptcp_prot = {
> +	.name		= "MPTCP",
> +	.owner		= THIS_MODULE,
> +	.init		= mptcp_init_sock,
> +	.close		= mptcp_close,
> +	.accept		= inet_csk_accept,
> +	.connect	= mptcp_connect,
> +	.shutdown	= tcp_shutdown,
> +	.sendmsg	= mptcp_sendmsg,
> +	.recvmsg	= mptcp_recvmsg,
> +	.hash		= inet_hash,
> +	.unhash		= inet_unhash,
> +	.get_port	= inet_csk_get_port,
> +	.obj_size	= sizeof(struct mptcp_sock),
> +	.no_autobind	= 1,
> +};
> +
> +static struct inet_protosw mptcp_protosw = {
> +	.type		= SOCK_STREAM,
> +	.protocol	= IPPROTO_MPTCP,
> +	.prot		= &mptcp_prot,
> +	.ops		= &inet_stream_ops,
> +};
> +
> +static int __init mptcp_init(void)
> +{
> +	int err;
> +
> +	err = proto_register(&mptcp_prot, 1);
> +	if (err)
> +		return err;
> +
> +	inet_register_protosw(&mptcp_protosw);
> +
> +	return 0;
> +}
> +
> +static void __exit mptcp_exit(void)
> +{
> +	inet_unregister_protosw(&mptcp_protosw);
> +	proto_unregister(&mptcp_prot);
> +}
> +
> +module_init(mptcp_init);
> +module_exit(mptcp_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_MPTCP);
> +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_MPTCP);
> -- 
> 2.19.1
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-10-10  0:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-05 22:59 [MPTCP] [RFC PATCH v3 03/16] mptcp: Add MPTCP socket stubs Mat Martineau
2018-10-08 18:11 Christoph Paasch
2018-10-08 19:48 Mat Martineau
2018-10-08 20:12 Krystad, Peter
2018-10-08 20:47 Christoph Paasch
2018-10-10  0:16 Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.