All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections
@ 2018-10-08 21:38 Krystad, Peter
  0 siblings, 0 replies; 5+ messages in thread
From: Krystad, Peter @ 2018-10-08 21:38 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 18758 bytes --]

On Mon, 2018-10-08 at 13:31 -0700, cpaasch(a)apple.com wrote:
> On 08/10/18 - 20:22:44, Krystad, Peter wrote:
> > On Mon, 2018-10-08 at 11:22 -0700, Christoph Paasch wrote:
> > > On 05/10/18 - 15:59:10, Mat Martineau wrote:
> > > > From: Peter Krystad <peter.krystad(a)intel.com>
> > > > 
> > > > Add subflow_request_sock type that extends tcp_request_sock
> > > > and add an is_mptcp flag to tcp_request_sock distinguish them.
> > > > 
> > > > Override the listen() and accept() methods of the MPTCP
> > > > socket proto_ops so they may act on the subflow socket.
> > > > 
> > > > Override the conn_request() and syn_recv_sock() handlers
> > > > in the inet_connection_sock to handle incoming MPTCP
> > > > SYNs and the ACK to the response SYN.
> > > > 
> > > > Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
> > > > SYN-ACK response for a subflow_request_sock.
> > > > 
> > > > Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
> > > > ---
> > > >  include/linux/tcp.h   |   1 +
> > > >  include/net/mptcp.h   |  27 ++++++++++
> > > >  net/ipv4/tcp_input.c  |   1 +
> > > >  net/ipv4/tcp_output.c |  14 +++++
> > > >  net/mptcp/options.c   |  14 +++++
> > > >  net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
> > > >  net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
> > > >  7 files changed, 264 insertions(+), 10 deletions(-)
> > > > 
> > > > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > > > index 7f0dd688376c..b109798482d3 100644
> > > > --- a/include/linux/tcp.h
> > > > +++ b/include/linux/tcp.h
> > > > @@ -148,6 +148,7 @@ struct tcp_request_sock {
> > > >  						  * FastOpen it's the seq#
> > > >  						  * after data-in-SYN.
> > > >  						  */
> > > > +	bool				is_mptcp;
> > > >  };
> > > >  
> > > >  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> > > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > > > index 4b08eb4ccc6f..56883a1ee2fe 100644
> > > > --- a/include/net/mptcp.h
> > > > +++ b/include/net/mptcp.h
> > > > @@ -69,11 +69,30 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
> > > >  	return (struct subflow_sock *)sk;
> > > >  }
> > > >  
> > > > +struct subflow_request_sock {
> > > > +	struct	tcp_request_sock sk;
> > > > +	u8	mp_capable : 1,
> > > > +		mp_join : 1,
> > > > +		checksum : 1,
> > > > +		backup : 1,
> > > > +		version : 4;
> > > > +	u64	local_key;
> > > > +	u64	remote_key;
> > > > +};
> > > > +
> > > > +static inline
> > > > +struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
> > > > +{
> > > > +	return (struct subflow_request_sock *)rsk;
> > > > +}
> > > > +
> > > >  #ifdef CONFIG_MPTCP
> > > >  
> > > >  void mptcp_parse_option(const unsigned char *ptr, int opsize,
> > > >  			struct tcp_options_received *opt_rx);
> > > >  unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
> > > > +unsigned int mptcp_synack_options(struct request_sock *req,
> > > > +				  u64 *local_key, u64 *remote_key);
> > > >  
> > > >  void mptcp_finish_connect(struct sock *sk, int mp_capable);
> > > >  
> > > > @@ -96,6 +115,14 @@ static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> > > >  {
> > > >  	return 0;
> > > >  }
> > > > +static inline unsigned int mptcp_synack_options(struct request_sock *sk,
> > > > +						u64 *local_key,
> > > > +						u64 *remote_key)
> > > > +{
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +
> > > >  
> > > >  #endif /* CONFIG_MPTCP */
> > > >  #endif /* __NET_MPTCP_H */
> > > > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > > > index 4cb38904bb5f..9a326729637f 100644
> > > > --- a/net/ipv4/tcp_input.c
> > > > +++ b/net/ipv4/tcp_input.c
> > > > @@ -6441,6 +6441,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
> > > >  
> > > >  	tcp_rsk(req)->af_specific = af_ops;
> > > >  	tcp_rsk(req)->ts_off = 0;
> > > > +	tcp_rsk(req)->is_mptcp = 0;
> > > >  
> > > >  	tcp_clear_options(&tmp_opt);
> > > >  	tmp_opt.mss_clamp = af_ops->mss_clamp;
> > > > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > > > index 9919793e293b..780abb11dffd 100644
> > > > --- a/net/ipv4/tcp_output.c
> > > > +++ b/net/ipv4/tcp_output.c
> > > > @@ -767,6 +767,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
> > > >  			remaining -= need;
> > > >  		}
> > > >  	}
> > > > +	if (tcp_rsk(req)->is_mptcp) {
> > > > +		u64 local_key;
> > > > +		u64 remote_key;
> > > > +		if (mptcp_synack_options(req, &local_key, &remote_key)) {
> > > > +			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
> > > > +				opts->options |= OPTION_MPTCP;
> > > > +				opts->suboptions |= OPTION_MPTCP_MPC_SYNACK;
> > > > +				opts->sndr_key = local_key;
> > > > +				opts->rcvr_key = remote_key;
> > > > +				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
> > > > +			}
> > > > +		}
> > > > +	}
> > > > +
> > > >  	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
> > > >  
> > > >  	return MAX_TCP_OPTION_SPACE - remaining;
> > > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > > > index 4b1cbc3b3efe..7e48d1d92aac 100644
> > > > --- a/net/mptcp/options.c
> > > > +++ b/net/mptcp/options.c
> > > > @@ -161,3 +161,17 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> > > >  	}
> > > >  	return subflow->request_mptcp;
> > > >  }
> > > > +
> > > > +unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
> > > > +				  u64 *remote_key)
> > > > +{
> > > > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > > > +
> > > > +	if (subflow_req->mp_capable) {
> > > > +		*local_key = subflow_req->local_key;
> > > > +		*remote_key = subflow_req->remote_key;
> > > > +		pr_debug("local_key=%llu", *local_key);
> > > > +		pr_debug("remote_key=%llu", *remote_key);
> > > > +	}
> > > > +	return subflow_req->mp_capable;
> > > > +}
> > > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > > > index 1a3412a742ea..9f802f69a528 100644
> > > > --- a/net/mptcp/protocol.c
> > > > +++ b/net/mptcp/protocol.c
> > > > @@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
> > > >  	}
> > > >  }
> > > >  
> > > > +static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> > > > +				 bool kern)
> > > > +{
> > > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > > +	struct socket *listener = msk->subflow;
> > > > +	struct socket *new_sock;
> > > > +	struct socket *mp;
> > > > +	struct subflow_sock *subflow;
> > > > +
> > > > +	pr_debug("msk=%p, listener=%p", msk, listener->sk);
> > > > +	*err = kernel_accept(listener, &new_sock, flags);
> > > > +	if (*err < 0)
> > > > +		return NULL;
> > > > +
> > > > +	subflow = subflow_sk(new_sock->sk);
> > > > +	pr_debug("new_sock=%p", subflow);
> > > > +
> > > > +	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
> > > > +	if (*err < 0) {
> > > > +		kernel_sock_shutdown(new_sock, SHUT_RDWR);
> > > > +		sock_release(new_sock);
> > > > +		return NULL;
> > > > +	}
> > > > +
> > > > +	msk = mptcp_sk(mp->sk);
> > > > +	pr_debug("msk=%p", msk);
> > > > +	subflow->conn = mp->sk;
> > > > +
> > > > +	if (subflow->mp_capable) {
> > > > +		msk->remote_key = subflow->remote_key;
> > > > +		msk->local_key = subflow->local_key;
> > > > +		msk->connection_list = new_sock;
> > > > +	} else {
> > > > +		msk->subflow = new_sock;
> > > > +	}
> > > > +
> > > > +	return mp->sk;
> > > > +}
> > > > +
> > > >  static int mptcp_get_port(struct sock *sk, unsigned short snum)
> > > >  {
> > > >  	struct mptcp_sock *msk = mptcp_sk(sk);
> > > > @@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
> > > >  int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> > > >  {
> > > >  	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > > -	struct socket *subflow = msk->subflow;
> > > > +	int err;
> > > >  
> > > > -	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
> > > > +	pr_debug("msk=%p", msk);
> > > >  
> > > > -	return inet_bind(subflow, uaddr, addr_len);
> > > > +	if (msk->subflow == NULL) {
> > > > +		err = subflow_create(sock->sk);
> > > > +		if (err)
> > > > +			return err;
> > > > +	}
> > > > +	return inet_bind(msk->subflow, uaddr, addr_len);
> > > >  }
> > > >  
> > > >  int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> > > > @@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> > > >  	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
> > > >  }
> > > >  
> > > > +int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> > > > +{
> > > > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > > +	struct socket *subflow;
> > > > +	int err = -EPERM;
> > > > +
> > > > +	if (msk->connection_list)
> > > > +		subflow = msk->connection_list;
> > > > +	else
> > > > +		subflow = msk->subflow;
> > > > +
> > > > +	err = inet_getname(subflow, uaddr, peer);
> > > > +
> > > > +	return err;
> > > > +}
> > > > +
> > > > +int mptcp_stream_listen(struct socket *sock, int backlog)
> > > > +{
> > > > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > > +	int err;
> > > > +
> > > > +	pr_debug("msk=%p", msk);
> > > > +
> > > > +	if (msk->subflow == NULL) {
> > > > +		err = subflow_create(sock->sk);
> > > > +		if (err)
> > > > +			return err;
> > > > +	}
> > > > +	return inet_listen(msk->subflow, backlog);
> > > > +}
> > > > +
> > > > +int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
> > > > +			bool kern)
> > > > +{
> > > > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > > +
> > > > +	pr_debug("msk=%p", msk);
> > > > +
> > > > +	if (msk->subflow == NULL) {
> > > > +		return -EINVAL;
> > > > +	}
> > > > +	return inet_accept(sock, newsock, flags, kern);
> > > > +}
> > > > +
> > > >  static struct proto mptcp_prot = {
> > > >  	.name		= "MPTCP",
> > > >  	.owner		= THIS_MODULE,
> > > >  	.init		= mptcp_init_sock,
> > > >  	.close		= mptcp_close,
> > > > -	.accept		= inet_csk_accept,
> > > > +	.accept		= mptcp_accept,
> > > >  	.shutdown	= tcp_shutdown,
> > > >  	.sendmsg	= mptcp_sendmsg,
> > > >  	.recvmsg	= mptcp_recvmsg,
> > > > @@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
> > > >  	.bind		   = mptcp_stream_bind,
> > > >  	.connect	   = mptcp_stream_connect,
> > > >  	.socketpair	   = sock_no_socketpair,
> > > > -	.accept		   = inet_accept,
> > > > -	.getname	   = inet_getname,
> > > > +	.accept		   = mptcp_stream_accept,
> > > > +	.getname	   = mptcp_stream_getname,
> > > >  	.poll		   = tcp_poll,
> > > >  	.ioctl		   = inet_ioctl,
> > > > -	.listen		   = inet_listen,
> > > > +	.listen		   = mptcp_stream_listen,
> > > >  	.shutdown	   = inet_shutdown,
> > > >  	.setsockopt	   = sock_common_setsockopt,
> > > >  	.getsockopt	   = sock_common_getsockopt,
> > > > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > > > index 5e5fdcb3175f..89fcc3b746eb 100644
> > > > --- a/net/mptcp/subflow.c
> > > > +++ b/net/mptcp/subflow.c
> > > > @@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> > > >  	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
> > > >  }
> > > >  
> > > > +static void subflow_v4_init_req(struct request_sock *req,
> > > > +				const struct sock *sk_listener,
> > > > +				struct sk_buff *skb)
> > > > +{
> > > > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > > > +	struct subflow_sock *listener = subflow_sk(sk_listener);
> > > > +	struct tcp_options_received rx_opt;
> > > > +
> > > > +	tcp_rsk(req)->is_mptcp = 1;
> > > > +	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
> > > > +
> > > > +	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
> > > > +
> > > > +	rx_opt.mptcp.flags = 0;
> > > > +	rx_opt.mptcp.mp_capable = 0;
> > > > +	rx_opt.mptcp.mp_join = 0;
> > > > +	rx_opt.mptcp.dss = 0;
> > > > +	mptcp_get_options(skb, &rx_opt);
> > > > +
> > > > +	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
> > > > +		subflow_req->mp_capable = 1;
> > > > +		if (rx_opt.mptcp.version >= listener->version)
> > > > +			subflow_req->version = listener->version;
> > > > +		else
> > > > +			subflow_req->version = rx_opt.mptcp.version;
> > > > +		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
> > > > +		    listener->checksum)
> > > > +			subflow_req->checksum = 1;
> > > > +		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
> > > > +	} else {
> > > > +		subflow_req->mp_capable = 0;
> > > > +	}
> > > > +}
> > > > +
> > > >  static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> > > >  {
> > > >  	struct subflow_sock *subflow = subflow_sk(sk);
> > > > @@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> > > >  	}
> > > >  }
> > > >  
> > > > +static struct request_sock_ops subflow_request_sock_ops;
> > > > +static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
> > > > +
> > > > +static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
> > > > +{
> > > > +	struct subflow_sock *subflow = subflow_sk(sk);
> > > > +
> > > > +	pr_debug("subflow=%p", subflow);
> > > > +
> > > > +	/* Never answer to SYNs sent to broadcast or multicast */
> > > > +	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
> > > > +		goto drop;
> > > > +
> > > > +	return tcp_conn_request(&subflow_request_sock_ops,
> > > > +				&subflow_request_sock_ipv4_ops,
> > > > +				sk, skb);
> > > > +drop:
> > > > +	tcp_listendrop(sk);
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > > > +					  struct sk_buff *skb,
> > > > +					  struct request_sock *req,
> > > > +					  struct dst_entry *dst,
> > > > +					  struct request_sock *req_unhash,
> > > > +					  bool *own_req)
> > > > +{
> > > > +	struct subflow_sock *listener = subflow_sk(sk);
> > > > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > > > +	struct sock *child;
> > > > +
> > > > +	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
> > > > +
> > > > +	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
> > > > +
> > > > +	if (child) {
> > > > +		struct subflow_sock *subflow = subflow_sk(child);
> > > > +
> > > > +		pr_debug("child=%p", child);
> > > > +		if (subflow_req->mp_capable) {
> > > > +			subflow->mp_capable = 1;
> > > > +			subflow->fourth_ack = 1;
> > > 
> > > Where is the fourth ack being triggered?
> > > 
> > 
> > For incoming connections this flag is used to prevent sending
> > MP_CAPABLE in any ACKs (see tcp_established_options). The field name is
> > a legacy choice.
> 
> I see.
> 
> > There is not currently a mechanism to send a duplicate
> > ack with first DSS option after the three-way handshake completes.
> 
> Oh - ok that is needed because MPTCP's JOIN-exchange requires a four-way
> handshake. Which is why I understood the "fourth_ack" as the final ACK of
> the four-way handshake and thus started looking for where the call to
> tcp_send_ack() is ;-)
> 
The legacy of the field name is that it was for this purpose
(triggering the foruth ack) but the prototpye hasn't got that far yet.

Peter.
 
> Christoph
> 
> > 
> > Peter.
> > 
> > > 
> > > Christoph
> > > 
> > > > +			subflow->remote_key = subflow_req->remote_key;
> > > > +			subflow->local_key = subflow_req->local_key;
> > > > +		} else {
> > > > +			subflow->mp_capable = 0;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	return child;
> > > > +}
> > > > +
> > > >  const struct inet_connection_sock_af_ops subflow_specific = {
> > > >  	.queue_xmit	   = ip_queue_xmit,
> > > >  	.send_check	   = tcp_v4_send_check,
> > > >  	.rebuild_header	   = inet_sk_rebuild_header,
> > > >  	.sk_rx_dst_set	   = subflow_finish_connect,
> > > > -	.conn_request	   = tcp_v4_conn_request,
> > > > -	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
> > > > +	.conn_request	   = subflow_conn_request,
> > > > +	.syn_recv_sock	   = subflow_syn_recv_sock,
> > > >  	.net_header_len	   = sizeof(struct iphdr),
> > > >  	.setsockopt	   = ip_setsockopt,
> > > >  	.getsockopt	   = ip_getsockopt,
> > > > @@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
> > > >  	tcp_close(sk, timeout);
> > > >  }
> > > >  
> > > > +static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
> > > > +				   bool kern)
> > > > +{
> > > > +	struct subflow_sock *subflow = subflow_sk(sk);
> > > > +	struct sock *child;
> > > > +
> > > > +	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
> > > > +
> > > > +	child = inet_csk_accept(sk, flags, err, kern);
> > > > +
> > > > +	pr_debug("child=%p", child);
> > > > +
> > > > +	return child;
> > > > +}
> > > > +
> > > >  static void subflow_destroy(struct sock *sk)
> > > >  {
> > > >  	pr_debug("subflow=%p", sk);
> > > > @@ -125,7 +227,7 @@ static struct proto subflow_prot = {
> > > >  	.close		= subflow_close,
> > > >  	.connect	= subflow_connect,
> > > >  	.disconnect	= tcp_disconnect,
> > > > -	.accept		= inet_csk_accept,
> > > > +	.accept		= subflow_accept,
> > > >  	.ioctl		= tcp_ioctl,
> > > >  	.init		= subflow_init_sock,
> > > >  	.destroy	= subflow_destroy,
> > > > @@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
> > > >  
> > > >  	/* TODO: Register path manager callbacks. */
> > > >  
> > > > +	subflow_request_sock_ops = tcp_request_sock_ops;
> > > > +	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
> > > > +
> > > > +	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
> > > > +	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
> > > > +
> > > >  	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
> > > > +	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
> > > >  	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
> > > >  	err = proto_register(&subflow_prot, 1);
> > > >  	if (err)
> > > > -- 
> > > > 2.19.1
> > > > 
> > > > _______________________________________________
> > > > mptcp mailing list
> > > > mptcp(a)lists.01.org
> > > > https://lists.01.org/mailman/listinfo/mptcp
> > > 
> > > _______________________________________________
> > > mptcp mailing list
> > > mptcp(a)lists.01.org
> > > https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections
@ 2018-10-08 20:31 cpaasch
  0 siblings, 0 replies; 5+ messages in thread
From: cpaasch @ 2018-10-08 20:31 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 17566 bytes --]

On 08/10/18 - 20:22:44, Krystad, Peter wrote:
> On Mon, 2018-10-08 at 11:22 -0700, Christoph Paasch wrote:
> > On 05/10/18 - 15:59:10, Mat Martineau wrote:
> > > From: Peter Krystad <peter.krystad(a)intel.com>
> > > 
> > > Add subflow_request_sock type that extends tcp_request_sock
> > > and add an is_mptcp flag to tcp_request_sock distinguish them.
> > > 
> > > Override the listen() and accept() methods of the MPTCP
> > > socket proto_ops so they may act on the subflow socket.
> > > 
> > > Override the conn_request() and syn_recv_sock() handlers
> > > in the inet_connection_sock to handle incoming MPTCP
> > > SYNs and the ACK to the response SYN.
> > > 
> > > Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
> > > SYN-ACK response for a subflow_request_sock.
> > > 
> > > Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
> > > ---
> > >  include/linux/tcp.h   |   1 +
> > >  include/net/mptcp.h   |  27 ++++++++++
> > >  net/ipv4/tcp_input.c  |   1 +
> > >  net/ipv4/tcp_output.c |  14 +++++
> > >  net/mptcp/options.c   |  14 +++++
> > >  net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
> > >  net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
> > >  7 files changed, 264 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > > index 7f0dd688376c..b109798482d3 100644
> > > --- a/include/linux/tcp.h
> > > +++ b/include/linux/tcp.h
> > > @@ -148,6 +148,7 @@ struct tcp_request_sock {
> > >  						  * FastOpen it's the seq#
> > >  						  * after data-in-SYN.
> > >  						  */
> > > +	bool				is_mptcp;
> > >  };
> > >  
> > >  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > > index 4b08eb4ccc6f..56883a1ee2fe 100644
> > > --- a/include/net/mptcp.h
> > > +++ b/include/net/mptcp.h
> > > @@ -69,11 +69,30 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
> > >  	return (struct subflow_sock *)sk;
> > >  }
> > >  
> > > +struct subflow_request_sock {
> > > +	struct	tcp_request_sock sk;
> > > +	u8	mp_capable : 1,
> > > +		mp_join : 1,
> > > +		checksum : 1,
> > > +		backup : 1,
> > > +		version : 4;
> > > +	u64	local_key;
> > > +	u64	remote_key;
> > > +};
> > > +
> > > +static inline
> > > +struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
> > > +{
> > > +	return (struct subflow_request_sock *)rsk;
> > > +}
> > > +
> > >  #ifdef CONFIG_MPTCP
> > >  
> > >  void mptcp_parse_option(const unsigned char *ptr, int opsize,
> > >  			struct tcp_options_received *opt_rx);
> > >  unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
> > > +unsigned int mptcp_synack_options(struct request_sock *req,
> > > +				  u64 *local_key, u64 *remote_key);
> > >  
> > >  void mptcp_finish_connect(struct sock *sk, int mp_capable);
> > >  
> > > @@ -96,6 +115,14 @@ static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> > >  {
> > >  	return 0;
> > >  }
> > > +static inline unsigned int mptcp_synack_options(struct request_sock *sk,
> > > +						u64 *local_key,
> > > +						u64 *remote_key)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > > +
> > >  
> > >  #endif /* CONFIG_MPTCP */
> > >  #endif /* __NET_MPTCP_H */
> > > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > > index 4cb38904bb5f..9a326729637f 100644
> > > --- a/net/ipv4/tcp_input.c
> > > +++ b/net/ipv4/tcp_input.c
> > > @@ -6441,6 +6441,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
> > >  
> > >  	tcp_rsk(req)->af_specific = af_ops;
> > >  	tcp_rsk(req)->ts_off = 0;
> > > +	tcp_rsk(req)->is_mptcp = 0;
> > >  
> > >  	tcp_clear_options(&tmp_opt);
> > >  	tmp_opt.mss_clamp = af_ops->mss_clamp;
> > > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > > index 9919793e293b..780abb11dffd 100644
> > > --- a/net/ipv4/tcp_output.c
> > > +++ b/net/ipv4/tcp_output.c
> > > @@ -767,6 +767,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
> > >  			remaining -= need;
> > >  		}
> > >  	}
> > > +	if (tcp_rsk(req)->is_mptcp) {
> > > +		u64 local_key;
> > > +		u64 remote_key;
> > > +		if (mptcp_synack_options(req, &local_key, &remote_key)) {
> > > +			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
> > > +				opts->options |= OPTION_MPTCP;
> > > +				opts->suboptions |= OPTION_MPTCP_MPC_SYNACK;
> > > +				opts->sndr_key = local_key;
> > > +				opts->rcvr_key = remote_key;
> > > +				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
> > > +			}
> > > +		}
> > > +	}
> > > +
> > >  	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
> > >  
> > >  	return MAX_TCP_OPTION_SPACE - remaining;
> > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > > index 4b1cbc3b3efe..7e48d1d92aac 100644
> > > --- a/net/mptcp/options.c
> > > +++ b/net/mptcp/options.c
> > > @@ -161,3 +161,17 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> > >  	}
> > >  	return subflow->request_mptcp;
> > >  }
> > > +
> > > +unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
> > > +				  u64 *remote_key)
> > > +{
> > > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > > +
> > > +	if (subflow_req->mp_capable) {
> > > +		*local_key = subflow_req->local_key;
> > > +		*remote_key = subflow_req->remote_key;
> > > +		pr_debug("local_key=%llu", *local_key);
> > > +		pr_debug("remote_key=%llu", *remote_key);
> > > +	}
> > > +	return subflow_req->mp_capable;
> > > +}
> > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > > index 1a3412a742ea..9f802f69a528 100644
> > > --- a/net/mptcp/protocol.c
> > > +++ b/net/mptcp/protocol.c
> > > @@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
> > >  	}
> > >  }
> > >  
> > > +static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> > > +				 bool kern)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > > +	struct socket *listener = msk->subflow;
> > > +	struct socket *new_sock;
> > > +	struct socket *mp;
> > > +	struct subflow_sock *subflow;
> > > +
> > > +	pr_debug("msk=%p, listener=%p", msk, listener->sk);
> > > +	*err = kernel_accept(listener, &new_sock, flags);
> > > +	if (*err < 0)
> > > +		return NULL;
> > > +
> > > +	subflow = subflow_sk(new_sock->sk);
> > > +	pr_debug("new_sock=%p", subflow);
> > > +
> > > +	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
> > > +	if (*err < 0) {
> > > +		kernel_sock_shutdown(new_sock, SHUT_RDWR);
> > > +		sock_release(new_sock);
> > > +		return NULL;
> > > +	}
> > > +
> > > +	msk = mptcp_sk(mp->sk);
> > > +	pr_debug("msk=%p", msk);
> > > +	subflow->conn = mp->sk;
> > > +
> > > +	if (subflow->mp_capable) {
> > > +		msk->remote_key = subflow->remote_key;
> > > +		msk->local_key = subflow->local_key;
> > > +		msk->connection_list = new_sock;
> > > +	} else {
> > > +		msk->subflow = new_sock;
> > > +	}
> > > +
> > > +	return mp->sk;
> > > +}
> > > +
> > >  static int mptcp_get_port(struct sock *sk, unsigned short snum)
> > >  {
> > >  	struct mptcp_sock *msk = mptcp_sk(sk);
> > > @@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
> > >  int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> > >  {
> > >  	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > -	struct socket *subflow = msk->subflow;
> > > +	int err;
> > >  
> > > -	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
> > > +	pr_debug("msk=%p", msk);
> > >  
> > > -	return inet_bind(subflow, uaddr, addr_len);
> > > +	if (msk->subflow == NULL) {
> > > +		err = subflow_create(sock->sk);
> > > +		if (err)
> > > +			return err;
> > > +	}
> > > +	return inet_bind(msk->subflow, uaddr, addr_len);
> > >  }
> > >  
> > >  int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> > > @@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> > >  	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
> > >  }
> > >  
> > > +int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > +	struct socket *subflow;
> > > +	int err = -EPERM;
> > > +
> > > +	if (msk->connection_list)
> > > +		subflow = msk->connection_list;
> > > +	else
> > > +		subflow = msk->subflow;
> > > +
> > > +	err = inet_getname(subflow, uaddr, peer);
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +int mptcp_stream_listen(struct socket *sock, int backlog)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > +	int err;
> > > +
> > > +	pr_debug("msk=%p", msk);
> > > +
> > > +	if (msk->subflow == NULL) {
> > > +		err = subflow_create(sock->sk);
> > > +		if (err)
> > > +			return err;
> > > +	}
> > > +	return inet_listen(msk->subflow, backlog);
> > > +}
> > > +
> > > +int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
> > > +			bool kern)
> > > +{
> > > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > > +
> > > +	pr_debug("msk=%p", msk);
> > > +
> > > +	if (msk->subflow == NULL) {
> > > +		return -EINVAL;
> > > +	}
> > > +	return inet_accept(sock, newsock, flags, kern);
> > > +}
> > > +
> > >  static struct proto mptcp_prot = {
> > >  	.name		= "MPTCP",
> > >  	.owner		= THIS_MODULE,
> > >  	.init		= mptcp_init_sock,
> > >  	.close		= mptcp_close,
> > > -	.accept		= inet_csk_accept,
> > > +	.accept		= mptcp_accept,
> > >  	.shutdown	= tcp_shutdown,
> > >  	.sendmsg	= mptcp_sendmsg,
> > >  	.recvmsg	= mptcp_recvmsg,
> > > @@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
> > >  	.bind		   = mptcp_stream_bind,
> > >  	.connect	   = mptcp_stream_connect,
> > >  	.socketpair	   = sock_no_socketpair,
> > > -	.accept		   = inet_accept,
> > > -	.getname	   = inet_getname,
> > > +	.accept		   = mptcp_stream_accept,
> > > +	.getname	   = mptcp_stream_getname,
> > >  	.poll		   = tcp_poll,
> > >  	.ioctl		   = inet_ioctl,
> > > -	.listen		   = inet_listen,
> > > +	.listen		   = mptcp_stream_listen,
> > >  	.shutdown	   = inet_shutdown,
> > >  	.setsockopt	   = sock_common_setsockopt,
> > >  	.getsockopt	   = sock_common_getsockopt,
> > > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > > index 5e5fdcb3175f..89fcc3b746eb 100644
> > > --- a/net/mptcp/subflow.c
> > > +++ b/net/mptcp/subflow.c
> > > @@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> > >  	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
> > >  }
> > >  
> > > +static void subflow_v4_init_req(struct request_sock *req,
> > > +				const struct sock *sk_listener,
> > > +				struct sk_buff *skb)
> > > +{
> > > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > > +	struct subflow_sock *listener = subflow_sk(sk_listener);
> > > +	struct tcp_options_received rx_opt;
> > > +
> > > +	tcp_rsk(req)->is_mptcp = 1;
> > > +	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
> > > +
> > > +	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
> > > +
> > > +	rx_opt.mptcp.flags = 0;
> > > +	rx_opt.mptcp.mp_capable = 0;
> > > +	rx_opt.mptcp.mp_join = 0;
> > > +	rx_opt.mptcp.dss = 0;
> > > +	mptcp_get_options(skb, &rx_opt);
> > > +
> > > +	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
> > > +		subflow_req->mp_capable = 1;
> > > +		if (rx_opt.mptcp.version >= listener->version)
> > > +			subflow_req->version = listener->version;
> > > +		else
> > > +			subflow_req->version = rx_opt.mptcp.version;
> > > +		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
> > > +		    listener->checksum)
> > > +			subflow_req->checksum = 1;
> > > +		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
> > > +	} else {
> > > +		subflow_req->mp_capable = 0;
> > > +	}
> > > +}
> > > +
> > >  static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> > >  {
> > >  	struct subflow_sock *subflow = subflow_sk(sk);
> > > @@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> > >  	}
> > >  }
> > >  
> > > +static struct request_sock_ops subflow_request_sock_ops;
> > > +static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
> > > +
> > > +static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
> > > +{
> > > +	struct subflow_sock *subflow = subflow_sk(sk);
> > > +
> > > +	pr_debug("subflow=%p", subflow);
> > > +
> > > +	/* Never answer to SYNs sent to broadcast or multicast */
> > > +	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
> > > +		goto drop;
> > > +
> > > +	return tcp_conn_request(&subflow_request_sock_ops,
> > > +				&subflow_request_sock_ipv4_ops,
> > > +				sk, skb);
> > > +drop:
> > > +	tcp_listendrop(sk);
> > > +	return 0;
> > > +}
> > > +
> > > +static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > > +					  struct sk_buff *skb,
> > > +					  struct request_sock *req,
> > > +					  struct dst_entry *dst,
> > > +					  struct request_sock *req_unhash,
> > > +					  bool *own_req)
> > > +{
> > > +	struct subflow_sock *listener = subflow_sk(sk);
> > > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > > +	struct sock *child;
> > > +
> > > +	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
> > > +
> > > +	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
> > > +
> > > +	if (child) {
> > > +		struct subflow_sock *subflow = subflow_sk(child);
> > > +
> > > +		pr_debug("child=%p", child);
> > > +		if (subflow_req->mp_capable) {
> > > +			subflow->mp_capable = 1;
> > > +			subflow->fourth_ack = 1;
> > 
> > Where is the fourth ack being triggered?
> > 
> For incoming connections this flag is used to prevent sending
> MP_CAPABLE in any ACKs (see tcp_established_options). The field name is
> a legacy choice.

I see.

> There is not currently a mechanism to send a duplicate
> ack with first DSS option after the three-way handshake completes.

Oh - ok that is needed because MPTCP's JOIN-exchange requires a four-way
handshake. Which is why I understood the "fourth_ack" as the final ACK of
the four-way handshake and thus started looking for where the call to
tcp_send_ack() is ;-)


Christoph

> 
> Peter.
> 
> > 
> > Christoph
> > 
> > > +			subflow->remote_key = subflow_req->remote_key;
> > > +			subflow->local_key = subflow_req->local_key;
> > > +		} else {
> > > +			subflow->mp_capable = 0;
> > > +		}
> > > +	}
> > > +
> > > +	return child;
> > > +}
> > > +
> > >  const struct inet_connection_sock_af_ops subflow_specific = {
> > >  	.queue_xmit	   = ip_queue_xmit,
> > >  	.send_check	   = tcp_v4_send_check,
> > >  	.rebuild_header	   = inet_sk_rebuild_header,
> > >  	.sk_rx_dst_set	   = subflow_finish_connect,
> > > -	.conn_request	   = tcp_v4_conn_request,
> > > -	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
> > > +	.conn_request	   = subflow_conn_request,
> > > +	.syn_recv_sock	   = subflow_syn_recv_sock,
> > >  	.net_header_len	   = sizeof(struct iphdr),
> > >  	.setsockopt	   = ip_setsockopt,
> > >  	.getsockopt	   = ip_getsockopt,
> > > @@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
> > >  	tcp_close(sk, timeout);
> > >  }
> > >  
> > > +static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
> > > +				   bool kern)
> > > +{
> > > +	struct subflow_sock *subflow = subflow_sk(sk);
> > > +	struct sock *child;
> > > +
> > > +	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
> > > +
> > > +	child = inet_csk_accept(sk, flags, err, kern);
> > > +
> > > +	pr_debug("child=%p", child);
> > > +
> > > +	return child;
> > > +}
> > > +
> > >  static void subflow_destroy(struct sock *sk)
> > >  {
> > >  	pr_debug("subflow=%p", sk);
> > > @@ -125,7 +227,7 @@ static struct proto subflow_prot = {
> > >  	.close		= subflow_close,
> > >  	.connect	= subflow_connect,
> > >  	.disconnect	= tcp_disconnect,
> > > -	.accept		= inet_csk_accept,
> > > +	.accept		= subflow_accept,
> > >  	.ioctl		= tcp_ioctl,
> > >  	.init		= subflow_init_sock,
> > >  	.destroy	= subflow_destroy,
> > > @@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
> > >  
> > >  	/* TODO: Register path manager callbacks. */
> > >  
> > > +	subflow_request_sock_ops = tcp_request_sock_ops;
> > > +	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
> > > +
> > > +	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
> > > +	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
> > > +
> > >  	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
> > > +	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
> > >  	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
> > >  	err = proto_register(&subflow_prot, 1);
> > >  	if (err)
> > > -- 
> > > 2.19.1
> > > 
> > > _______________________________________________
> > > mptcp mailing list
> > > mptcp(a)lists.01.org
> > > https://lists.01.org/mailman/listinfo/mptcp
> > 
> > _______________________________________________
> > mptcp mailing list
> > mptcp(a)lists.01.org
> > https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections
@ 2018-10-08 20:22 Krystad, Peter
  0 siblings, 0 replies; 5+ messages in thread
From: Krystad, Peter @ 2018-10-08 20:22 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 16286 bytes --]

On Mon, 2018-10-08 at 11:22 -0700, Christoph Paasch wrote:
> On 05/10/18 - 15:59:10, Mat Martineau wrote:
> > From: Peter Krystad <peter.krystad(a)intel.com>
> > 
> > Add subflow_request_sock type that extends tcp_request_sock
> > and add an is_mptcp flag to tcp_request_sock distinguish them.
> > 
> > Override the listen() and accept() methods of the MPTCP
> > socket proto_ops so they may act on the subflow socket.
> > 
> > Override the conn_request() and syn_recv_sock() handlers
> > in the inet_connection_sock to handle incoming MPTCP
> > SYNs and the ACK to the response SYN.
> > 
> > Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
> > SYN-ACK response for a subflow_request_sock.
> > 
> > Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
> > ---
> >  include/linux/tcp.h   |   1 +
> >  include/net/mptcp.h   |  27 ++++++++++
> >  net/ipv4/tcp_input.c  |   1 +
> >  net/ipv4/tcp_output.c |  14 +++++
> >  net/mptcp/options.c   |  14 +++++
> >  net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
> >  net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
> >  7 files changed, 264 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > index 7f0dd688376c..b109798482d3 100644
> > --- a/include/linux/tcp.h
> > +++ b/include/linux/tcp.h
> > @@ -148,6 +148,7 @@ struct tcp_request_sock {
> >  						  * FastOpen it's the seq#
> >  						  * after data-in-SYN.
> >  						  */
> > +	bool				is_mptcp;
> >  };
> >  
> >  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index 4b08eb4ccc6f..56883a1ee2fe 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -69,11 +69,30 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
> >  	return (struct subflow_sock *)sk;
> >  }
> >  
> > +struct subflow_request_sock {
> > +	struct	tcp_request_sock sk;
> > +	u8	mp_capable : 1,
> > +		mp_join : 1,
> > +		checksum : 1,
> > +		backup : 1,
> > +		version : 4;
> > +	u64	local_key;
> > +	u64	remote_key;
> > +};
> > +
> > +static inline
> > +struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
> > +{
> > +	return (struct subflow_request_sock *)rsk;
> > +}
> > +
> >  #ifdef CONFIG_MPTCP
> >  
> >  void mptcp_parse_option(const unsigned char *ptr, int opsize,
> >  			struct tcp_options_received *opt_rx);
> >  unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
> > +unsigned int mptcp_synack_options(struct request_sock *req,
> > +				  u64 *local_key, u64 *remote_key);
> >  
> >  void mptcp_finish_connect(struct sock *sk, int mp_capable);
> >  
> > @@ -96,6 +115,14 @@ static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> >  {
> >  	return 0;
> >  }
> > +static inline unsigned int mptcp_synack_options(struct request_sock *sk,
> > +						u64 *local_key,
> > +						u64 *remote_key)
> > +{
> > +	return 0;
> > +}
> > +
> > +
> >  
> >  #endif /* CONFIG_MPTCP */
> >  #endif /* __NET_MPTCP_H */
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 4cb38904bb5f..9a326729637f 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -6441,6 +6441,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
> >  
> >  	tcp_rsk(req)->af_specific = af_ops;
> >  	tcp_rsk(req)->ts_off = 0;
> > +	tcp_rsk(req)->is_mptcp = 0;
> >  
> >  	tcp_clear_options(&tmp_opt);
> >  	tmp_opt.mss_clamp = af_ops->mss_clamp;
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index 9919793e293b..780abb11dffd 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -767,6 +767,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
> >  			remaining -= need;
> >  		}
> >  	}
> > +	if (tcp_rsk(req)->is_mptcp) {
> > +		u64 local_key;
> > +		u64 remote_key;
> > +		if (mptcp_synack_options(req, &local_key, &remote_key)) {
> > +			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
> > +				opts->options |= OPTION_MPTCP;
> > +				opts->suboptions |= OPTION_MPTCP_MPC_SYNACK;
> > +				opts->sndr_key = local_key;
> > +				opts->rcvr_key = remote_key;
> > +				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
> > +			}
> > +		}
> > +	}
> > +
> >  	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
> >  
> >  	return MAX_TCP_OPTION_SPACE - remaining;
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index 4b1cbc3b3efe..7e48d1d92aac 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -161,3 +161,17 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> >  	}
> >  	return subflow->request_mptcp;
> >  }
> > +
> > +unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
> > +				  u64 *remote_key)
> > +{
> > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +
> > +	if (subflow_req->mp_capable) {
> > +		*local_key = subflow_req->local_key;
> > +		*remote_key = subflow_req->remote_key;
> > +		pr_debug("local_key=%llu", *local_key);
> > +		pr_debug("remote_key=%llu", *remote_key);
> > +	}
> > +	return subflow_req->mp_capable;
> > +}
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 1a3412a742ea..9f802f69a528 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
> >  	}
> >  }
> >  
> > +static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> > +				 bool kern)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	struct socket *listener = msk->subflow;
> > +	struct socket *new_sock;
> > +	struct socket *mp;
> > +	struct subflow_sock *subflow;
> > +
> > +	pr_debug("msk=%p, listener=%p", msk, listener->sk);
> > +	*err = kernel_accept(listener, &new_sock, flags);
> > +	if (*err < 0)
> > +		return NULL;
> > +
> > +	subflow = subflow_sk(new_sock->sk);
> > +	pr_debug("new_sock=%p", subflow);
> > +
> > +	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
> > +	if (*err < 0) {
> > +		kernel_sock_shutdown(new_sock, SHUT_RDWR);
> > +		sock_release(new_sock);
> > +		return NULL;
> > +	}
> > +
> > +	msk = mptcp_sk(mp->sk);
> > +	pr_debug("msk=%p", msk);
> > +	subflow->conn = mp->sk;
> > +
> > +	if (subflow->mp_capable) {
> > +		msk->remote_key = subflow->remote_key;
> > +		msk->local_key = subflow->local_key;
> > +		msk->connection_list = new_sock;
> > +	} else {
> > +		msk->subflow = new_sock;
> > +	}
> > +
> > +	return mp->sk;
> > +}
> > +
> >  static int mptcp_get_port(struct sock *sk, unsigned short snum)
> >  {
> >  	struct mptcp_sock *msk = mptcp_sk(sk);
> > @@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
> >  int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> >  {
> >  	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > -	struct socket *subflow = msk->subflow;
> > +	int err;
> >  
> > -	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
> > +	pr_debug("msk=%p", msk);
> >  
> > -	return inet_bind(subflow, uaddr, addr_len);
> > +	if (msk->subflow == NULL) {
> > +		err = subflow_create(sock->sk);
> > +		if (err)
> > +			return err;
> > +	}
> > +	return inet_bind(msk->subflow, uaddr, addr_len);
> >  }
> >  
> >  int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> > @@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> >  	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
> >  }
> >  
> > +int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > +	struct socket *subflow;
> > +	int err = -EPERM;
> > +
> > +	if (msk->connection_list)
> > +		subflow = msk->connection_list;
> > +	else
> > +		subflow = msk->subflow;
> > +
> > +	err = inet_getname(subflow, uaddr, peer);
> > +
> > +	return err;
> > +}
> > +
> > +int mptcp_stream_listen(struct socket *sock, int backlog)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > +	int err;
> > +
> > +	pr_debug("msk=%p", msk);
> > +
> > +	if (msk->subflow == NULL) {
> > +		err = subflow_create(sock->sk);
> > +		if (err)
> > +			return err;
> > +	}
> > +	return inet_listen(msk->subflow, backlog);
> > +}
> > +
> > +int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
> > +			bool kern)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > +
> > +	pr_debug("msk=%p", msk);
> > +
> > +	if (msk->subflow == NULL) {
> > +		return -EINVAL;
> > +	}
> > +	return inet_accept(sock, newsock, flags, kern);
> > +}
> > +
> >  static struct proto mptcp_prot = {
> >  	.name		= "MPTCP",
> >  	.owner		= THIS_MODULE,
> >  	.init		= mptcp_init_sock,
> >  	.close		= mptcp_close,
> > -	.accept		= inet_csk_accept,
> > +	.accept		= mptcp_accept,
> >  	.shutdown	= tcp_shutdown,
> >  	.sendmsg	= mptcp_sendmsg,
> >  	.recvmsg	= mptcp_recvmsg,
> > @@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
> >  	.bind		   = mptcp_stream_bind,
> >  	.connect	   = mptcp_stream_connect,
> >  	.socketpair	   = sock_no_socketpair,
> > -	.accept		   = inet_accept,
> > -	.getname	   = inet_getname,
> > +	.accept		   = mptcp_stream_accept,
> > +	.getname	   = mptcp_stream_getname,
> >  	.poll		   = tcp_poll,
> >  	.ioctl		   = inet_ioctl,
> > -	.listen		   = inet_listen,
> > +	.listen		   = mptcp_stream_listen,
> >  	.shutdown	   = inet_shutdown,
> >  	.setsockopt	   = sock_common_setsockopt,
> >  	.getsockopt	   = sock_common_getsockopt,
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 5e5fdcb3175f..89fcc3b746eb 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> >  	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
> >  }
> >  
> > +static void subflow_v4_init_req(struct request_sock *req,
> > +				const struct sock *sk_listener,
> > +				struct sk_buff *skb)
> > +{
> > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +	struct subflow_sock *listener = subflow_sk(sk_listener);
> > +	struct tcp_options_received rx_opt;
> > +
> > +	tcp_rsk(req)->is_mptcp = 1;
> > +	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
> > +
> > +	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
> > +
> > +	rx_opt.mptcp.flags = 0;
> > +	rx_opt.mptcp.mp_capable = 0;
> > +	rx_opt.mptcp.mp_join = 0;
> > +	rx_opt.mptcp.dss = 0;
> > +	mptcp_get_options(skb, &rx_opt);
> > +
> > +	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
> > +		subflow_req->mp_capable = 1;
> > +		if (rx_opt.mptcp.version >= listener->version)
> > +			subflow_req->version = listener->version;
> > +		else
> > +			subflow_req->version = rx_opt.mptcp.version;
> > +		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
> > +		    listener->checksum)
> > +			subflow_req->checksum = 1;
> > +		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
> > +	} else {
> > +		subflow_req->mp_capable = 0;
> > +	}
> > +}
> > +
> >  static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> >  {
> >  	struct subflow_sock *subflow = subflow_sk(sk);
> > @@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> >  	}
> >  }
> >  
> > +static struct request_sock_ops subflow_request_sock_ops;
> > +static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
> > +
> > +static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
> > +{
> > +	struct subflow_sock *subflow = subflow_sk(sk);
> > +
> > +	pr_debug("subflow=%p", subflow);
> > +
> > +	/* Never answer to SYNs sent to broadcast or multicast */
> > +	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
> > +		goto drop;
> > +
> > +	return tcp_conn_request(&subflow_request_sock_ops,
> > +				&subflow_request_sock_ipv4_ops,
> > +				sk, skb);
> > +drop:
> > +	tcp_listendrop(sk);
> > +	return 0;
> > +}
> > +
> > +static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > +					  struct sk_buff *skb,
> > +					  struct request_sock *req,
> > +					  struct dst_entry *dst,
> > +					  struct request_sock *req_unhash,
> > +					  bool *own_req)
> > +{
> > +	struct subflow_sock *listener = subflow_sk(sk);
> > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +	struct sock *child;
> > +
> > +	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
> > +
> > +	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
> > +
> > +	if (child) {
> > +		struct subflow_sock *subflow = subflow_sk(child);
> > +
> > +		pr_debug("child=%p", child);
> > +		if (subflow_req->mp_capable) {
> > +			subflow->mp_capable = 1;
> > +			subflow->fourth_ack = 1;
> 
> Where is the fourth ack being triggered?
> 
For incoming connections this flag is used to prevent sending
MP_CAPABLE in any ACKs (see tcp_established_options). The field name is
a legacy choice. There is not currently a mechanism to send a duplicate
ack with first DSS option after the three-way handshake completes.

Peter.

> 
> Christoph
> 
> > +			subflow->remote_key = subflow_req->remote_key;
> > +			subflow->local_key = subflow_req->local_key;
> > +		} else {
> > +			subflow->mp_capable = 0;
> > +		}
> > +	}
> > +
> > +	return child;
> > +}
> > +
> >  const struct inet_connection_sock_af_ops subflow_specific = {
> >  	.queue_xmit	   = ip_queue_xmit,
> >  	.send_check	   = tcp_v4_send_check,
> >  	.rebuild_header	   = inet_sk_rebuild_header,
> >  	.sk_rx_dst_set	   = subflow_finish_connect,
> > -	.conn_request	   = tcp_v4_conn_request,
> > -	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
> > +	.conn_request	   = subflow_conn_request,
> > +	.syn_recv_sock	   = subflow_syn_recv_sock,
> >  	.net_header_len	   = sizeof(struct iphdr),
> >  	.setsockopt	   = ip_setsockopt,
> >  	.getsockopt	   = ip_getsockopt,
> > @@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
> >  	tcp_close(sk, timeout);
> >  }
> >  
> > +static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
> > +				   bool kern)
> > +{
> > +	struct subflow_sock *subflow = subflow_sk(sk);
> > +	struct sock *child;
> > +
> > +	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
> > +
> > +	child = inet_csk_accept(sk, flags, err, kern);
> > +
> > +	pr_debug("child=%p", child);
> > +
> > +	return child;
> > +}
> > +
> >  static void subflow_destroy(struct sock *sk)
> >  {
> >  	pr_debug("subflow=%p", sk);
> > @@ -125,7 +227,7 @@ static struct proto subflow_prot = {
> >  	.close		= subflow_close,
> >  	.connect	= subflow_connect,
> >  	.disconnect	= tcp_disconnect,
> > -	.accept		= inet_csk_accept,
> > +	.accept		= subflow_accept,
> >  	.ioctl		= tcp_ioctl,
> >  	.init		= subflow_init_sock,
> >  	.destroy	= subflow_destroy,
> > @@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
> >  
> >  	/* TODO: Register path manager callbacks. */
> >  
> > +	subflow_request_sock_ops = tcp_request_sock_ops;
> > +	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
> > +
> > +	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
> > +	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
> > +
> >  	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
> > +	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
> >  	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
> >  	err = proto_register(&subflow_prot, 1);
> >  	if (err)
> > -- 
> > 2.19.1
> > 
> > _______________________________________________
> > mptcp mailing list
> > mptcp(a)lists.01.org
> > https://lists.01.org/mailman/listinfo/mptcp
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections
@ 2018-10-08 18:22 Christoph Paasch
  0 siblings, 0 replies; 5+ messages in thread
From: Christoph Paasch @ 2018-10-08 18:22 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 14863 bytes --]

On 05/10/18 - 15:59:10, Mat Martineau wrote:
> From: Peter Krystad <peter.krystad(a)intel.com>
> 
> Add subflow_request_sock type that extends tcp_request_sock
> and add an is_mptcp flag to tcp_request_sock distinguish them.
> 
> Override the listen() and accept() methods of the MPTCP
> socket proto_ops so they may act on the subflow socket.
> 
> Override the conn_request() and syn_recv_sock() handlers
> in the inet_connection_sock to handle incoming MPTCP
> SYNs and the ACK to the response SYN.
> 
> Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
> SYN-ACK response for a subflow_request_sock.
> 
> Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
> ---
>  include/linux/tcp.h   |   1 +
>  include/net/mptcp.h   |  27 ++++++++++
>  net/ipv4/tcp_input.c  |   1 +
>  net/ipv4/tcp_output.c |  14 +++++
>  net/mptcp/options.c   |  14 +++++
>  net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
>  net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
>  7 files changed, 264 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 7f0dd688376c..b109798482d3 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -148,6 +148,7 @@ struct tcp_request_sock {
>  						  * FastOpen it's the seq#
>  						  * after data-in-SYN.
>  						  */
> +	bool				is_mptcp;
>  };
>  
>  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 4b08eb4ccc6f..56883a1ee2fe 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -69,11 +69,30 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
>  	return (struct subflow_sock *)sk;
>  }
>  
> +struct subflow_request_sock {
> +	struct	tcp_request_sock sk;
> +	u8	mp_capable : 1,
> +		mp_join : 1,
> +		checksum : 1,
> +		backup : 1,
> +		version : 4;
> +	u64	local_key;
> +	u64	remote_key;
> +};
> +
> +static inline
> +struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
> +{
> +	return (struct subflow_request_sock *)rsk;
> +}
> +
>  #ifdef CONFIG_MPTCP
>  
>  void mptcp_parse_option(const unsigned char *ptr, int opsize,
>  			struct tcp_options_received *opt_rx);
>  unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
> +unsigned int mptcp_synack_options(struct request_sock *req,
> +				  u64 *local_key, u64 *remote_key);
>  
>  void mptcp_finish_connect(struct sock *sk, int mp_capable);
>  
> @@ -96,6 +115,14 @@ static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
>  {
>  	return 0;
>  }
> +static inline unsigned int mptcp_synack_options(struct request_sock *sk,
> +						u64 *local_key,
> +						u64 *remote_key)
> +{
> +	return 0;
> +}
> +
> +
>  
>  #endif /* CONFIG_MPTCP */
>  #endif /* __NET_MPTCP_H */
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 4cb38904bb5f..9a326729637f 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -6441,6 +6441,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>  
>  	tcp_rsk(req)->af_specific = af_ops;
>  	tcp_rsk(req)->ts_off = 0;
> +	tcp_rsk(req)->is_mptcp = 0;
>  
>  	tcp_clear_options(&tmp_opt);
>  	tmp_opt.mss_clamp = af_ops->mss_clamp;
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 9919793e293b..780abb11dffd 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -767,6 +767,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
>  			remaining -= need;
>  		}
>  	}
> +	if (tcp_rsk(req)->is_mptcp) {
> +		u64 local_key;
> +		u64 remote_key;
> +		if (mptcp_synack_options(req, &local_key, &remote_key)) {
> +			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
> +				opts->options |= OPTION_MPTCP;
> +				opts->suboptions |= OPTION_MPTCP_MPC_SYNACK;
> +				opts->sndr_key = local_key;
> +				opts->rcvr_key = remote_key;
> +				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
> +			}
> +		}
> +	}
> +
>  	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
>  
>  	return MAX_TCP_OPTION_SPACE - remaining;
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 4b1cbc3b3efe..7e48d1d92aac 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -161,3 +161,17 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
>  	}
>  	return subflow->request_mptcp;
>  }
> +
> +unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
> +				  u64 *remote_key)
> +{
> +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> +
> +	if (subflow_req->mp_capable) {
> +		*local_key = subflow_req->local_key;
> +		*remote_key = subflow_req->remote_key;
> +		pr_debug("local_key=%llu", *local_key);
> +		pr_debug("remote_key=%llu", *remote_key);
> +	}
> +	return subflow_req->mp_capable;
> +}
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 1a3412a742ea..9f802f69a528 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
>  	}
>  }
>  
> +static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> +				 bool kern)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct socket *listener = msk->subflow;
> +	struct socket *new_sock;
> +	struct socket *mp;
> +	struct subflow_sock *subflow;
> +
> +	pr_debug("msk=%p, listener=%p", msk, listener->sk);
> +	*err = kernel_accept(listener, &new_sock, flags);
> +	if (*err < 0)
> +		return NULL;
> +
> +	subflow = subflow_sk(new_sock->sk);
> +	pr_debug("new_sock=%p", subflow);
> +
> +	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
> +	if (*err < 0) {
> +		kernel_sock_shutdown(new_sock, SHUT_RDWR);
> +		sock_release(new_sock);
> +		return NULL;
> +	}
> +
> +	msk = mptcp_sk(mp->sk);
> +	pr_debug("msk=%p", msk);
> +	subflow->conn = mp->sk;
> +
> +	if (subflow->mp_capable) {
> +		msk->remote_key = subflow->remote_key;
> +		msk->local_key = subflow->local_key;
> +		msk->connection_list = new_sock;
> +	} else {
> +		msk->subflow = new_sock;
> +	}
> +
> +	return mp->sk;
> +}
> +
>  static int mptcp_get_port(struct sock *sk, unsigned short snum)
>  {
>  	struct mptcp_sock *msk = mptcp_sk(sk);
> @@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
>  int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>  {
>  	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> -	struct socket *subflow = msk->subflow;
> +	int err;
>  
> -	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
> +	pr_debug("msk=%p", msk);
>  
> -	return inet_bind(subflow, uaddr, addr_len);
> +	if (msk->subflow == NULL) {
> +		err = subflow_create(sock->sk);
> +		if (err)
> +			return err;
> +	}
> +	return inet_bind(msk->subflow, uaddr, addr_len);
>  }
>  
>  int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> @@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>  	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
>  }
>  
> +int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> +	struct socket *subflow;
> +	int err = -EPERM;
> +
> +	if (msk->connection_list)
> +		subflow = msk->connection_list;
> +	else
> +		subflow = msk->subflow;
> +
> +	err = inet_getname(subflow, uaddr, peer);
> +
> +	return err;
> +}
> +
> +int mptcp_stream_listen(struct socket *sock, int backlog)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> +	int err;
> +
> +	pr_debug("msk=%p", msk);
> +
> +	if (msk->subflow == NULL) {
> +		err = subflow_create(sock->sk);
> +		if (err)
> +			return err;
> +	}
> +	return inet_listen(msk->subflow, backlog);
> +}
> +
> +int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
> +			bool kern)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> +
> +	pr_debug("msk=%p", msk);
> +
> +	if (msk->subflow == NULL) {
> +		return -EINVAL;
> +	}
> +	return inet_accept(sock, newsock, flags, kern);
> +}
> +
>  static struct proto mptcp_prot = {
>  	.name		= "MPTCP",
>  	.owner		= THIS_MODULE,
>  	.init		= mptcp_init_sock,
>  	.close		= mptcp_close,
> -	.accept		= inet_csk_accept,
> +	.accept		= mptcp_accept,
>  	.shutdown	= tcp_shutdown,
>  	.sendmsg	= mptcp_sendmsg,
>  	.recvmsg	= mptcp_recvmsg,
> @@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
>  	.bind		   = mptcp_stream_bind,
>  	.connect	   = mptcp_stream_connect,
>  	.socketpair	   = sock_no_socketpair,
> -	.accept		   = inet_accept,
> -	.getname	   = inet_getname,
> +	.accept		   = mptcp_stream_accept,
> +	.getname	   = mptcp_stream_getname,
>  	.poll		   = tcp_poll,
>  	.ioctl		   = inet_ioctl,
> -	.listen		   = inet_listen,
> +	.listen		   = mptcp_stream_listen,
>  	.shutdown	   = inet_shutdown,
>  	.setsockopt	   = sock_common_setsockopt,
>  	.getsockopt	   = sock_common_getsockopt,
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 5e5fdcb3175f..89fcc3b746eb 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>  	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
>  }
>  
> +static void subflow_v4_init_req(struct request_sock *req,
> +				const struct sock *sk_listener,
> +				struct sk_buff *skb)
> +{
> +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> +	struct subflow_sock *listener = subflow_sk(sk_listener);
> +	struct tcp_options_received rx_opt;
> +
> +	tcp_rsk(req)->is_mptcp = 1;
> +	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
> +
> +	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
> +
> +	rx_opt.mptcp.flags = 0;
> +	rx_opt.mptcp.mp_capable = 0;
> +	rx_opt.mptcp.mp_join = 0;
> +	rx_opt.mptcp.dss = 0;
> +	mptcp_get_options(skb, &rx_opt);
> +
> +	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
> +		subflow_req->mp_capable = 1;
> +		if (rx_opt.mptcp.version >= listener->version)
> +			subflow_req->version = listener->version;
> +		else
> +			subflow_req->version = rx_opt.mptcp.version;
> +		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
> +		    listener->checksum)
> +			subflow_req->checksum = 1;
> +		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
> +	} else {
> +		subflow_req->mp_capable = 0;
> +	}
> +}
> +
>  static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
>  {
>  	struct subflow_sock *subflow = subflow_sk(sk);
> @@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
>  	}
>  }
>  
> +static struct request_sock_ops subflow_request_sock_ops;
> +static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
> +
> +static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
> +{
> +	struct subflow_sock *subflow = subflow_sk(sk);
> +
> +	pr_debug("subflow=%p", subflow);
> +
> +	/* Never answer to SYNs sent to broadcast or multicast */
> +	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
> +		goto drop;
> +
> +	return tcp_conn_request(&subflow_request_sock_ops,
> +				&subflow_request_sock_ipv4_ops,
> +				sk, skb);
> +drop:
> +	tcp_listendrop(sk);
> +	return 0;
> +}
> +
> +static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> +					  struct sk_buff *skb,
> +					  struct request_sock *req,
> +					  struct dst_entry *dst,
> +					  struct request_sock *req_unhash,
> +					  bool *own_req)
> +{
> +	struct subflow_sock *listener = subflow_sk(sk);
> +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> +	struct sock *child;
> +
> +	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
> +
> +	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
> +
> +	if (child) {
> +		struct subflow_sock *subflow = subflow_sk(child);
> +
> +		pr_debug("child=%p", child);
> +		if (subflow_req->mp_capable) {
> +			subflow->mp_capable = 1;
> +			subflow->fourth_ack = 1;

Where is the fourth ack being triggered?


Christoph

> +			subflow->remote_key = subflow_req->remote_key;
> +			subflow->local_key = subflow_req->local_key;
> +		} else {
> +			subflow->mp_capable = 0;
> +		}
> +	}
> +
> +	return child;
> +}
> +
>  const struct inet_connection_sock_af_ops subflow_specific = {
>  	.queue_xmit	   = ip_queue_xmit,
>  	.send_check	   = tcp_v4_send_check,
>  	.rebuild_header	   = inet_sk_rebuild_header,
>  	.sk_rx_dst_set	   = subflow_finish_connect,
> -	.conn_request	   = tcp_v4_conn_request,
> -	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
> +	.conn_request	   = subflow_conn_request,
> +	.syn_recv_sock	   = subflow_syn_recv_sock,
>  	.net_header_len	   = sizeof(struct iphdr),
>  	.setsockopt	   = ip_setsockopt,
>  	.getsockopt	   = ip_getsockopt,
> @@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
>  	tcp_close(sk, timeout);
>  }
>  
> +static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
> +				   bool kern)
> +{
> +	struct subflow_sock *subflow = subflow_sk(sk);
> +	struct sock *child;
> +
> +	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
> +
> +	child = inet_csk_accept(sk, flags, err, kern);
> +
> +	pr_debug("child=%p", child);
> +
> +	return child;
> +}
> +
>  static void subflow_destroy(struct sock *sk)
>  {
>  	pr_debug("subflow=%p", sk);
> @@ -125,7 +227,7 @@ static struct proto subflow_prot = {
>  	.close		= subflow_close,
>  	.connect	= subflow_connect,
>  	.disconnect	= tcp_disconnect,
> -	.accept		= inet_csk_accept,
> +	.accept		= subflow_accept,
>  	.ioctl		= tcp_ioctl,
>  	.init		= subflow_init_sock,
>  	.destroy	= subflow_destroy,
> @@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
>  
>  	/* TODO: Register path manager callbacks. */
>  
> +	subflow_request_sock_ops = tcp_request_sock_ops;
> +	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
> +
> +	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
> +	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
> +
>  	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
> +	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
>  	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
>  	err = proto_register(&subflow_prot, 1);
>  	if (err)
> -- 
> 2.19.1
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections
@ 2018-10-05 22:59 Mat Martineau
  0 siblings, 0 replies; 5+ messages in thread
From: Mat Martineau @ 2018-10-05 22:59 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 13706 bytes --]

From: Peter Krystad <peter.krystad(a)intel.com>

Add subflow_request_sock type that extends tcp_request_sock
and add an is_mptcp flag to tcp_request_sock distinguish them.

Override the listen() and accept() methods of the MPTCP
socket proto_ops so they may act on the subflow socket.

Override the conn_request() and syn_recv_sock() handlers
in the inet_connection_sock to handle incoming MPTCP
SYNs and the ACK to the response SYN.

Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
SYN-ACK response for a subflow_request_sock.

Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
---
 include/linux/tcp.h   |   1 +
 include/net/mptcp.h   |  27 ++++++++++
 net/ipv4/tcp_input.c  |   1 +
 net/ipv4/tcp_output.c |  14 +++++
 net/mptcp/options.c   |  14 +++++
 net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
 net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
 7 files changed, 264 insertions(+), 10 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7f0dd688376c..b109798482d3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -148,6 +148,7 @@ struct tcp_request_sock {
 						  * FastOpen it's the seq#
 						  * after data-in-SYN.
 						  */
+	bool				is_mptcp;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 4b08eb4ccc6f..56883a1ee2fe 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -69,11 +69,30 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
 	return (struct subflow_sock *)sk;
 }
 
+struct subflow_request_sock {
+	struct	tcp_request_sock sk;
+	u8	mp_capable : 1,
+		mp_join : 1,
+		checksum : 1,
+		backup : 1,
+		version : 4;
+	u64	local_key;
+	u64	remote_key;
+};
+
+static inline
+struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
+{
+	return (struct subflow_request_sock *)rsk;
+}
+
 #ifdef CONFIG_MPTCP
 
 void mptcp_parse_option(const unsigned char *ptr, int opsize,
 			struct tcp_options_received *opt_rx);
 unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
+unsigned int mptcp_synack_options(struct request_sock *req,
+				  u64 *local_key, u64 *remote_key);
 
 void mptcp_finish_connect(struct sock *sk, int mp_capable);
 
@@ -96,6 +115,14 @@ static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
 {
 	return 0;
 }
+static inline unsigned int mptcp_synack_options(struct request_sock *sk,
+						u64 *local_key,
+						u64 *remote_key)
+{
+	return 0;
+}
+
+
 
 #endif /* CONFIG_MPTCP */
 #endif /* __NET_MPTCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4cb38904bb5f..9a326729637f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6441,6 +6441,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	tcp_rsk(req)->af_specific = af_ops;
 	tcp_rsk(req)->ts_off = 0;
+	tcp_rsk(req)->is_mptcp = 0;
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = af_ops->mss_clamp;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9919793e293b..780abb11dffd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -767,6 +767,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 			remaining -= need;
 		}
 	}
+	if (tcp_rsk(req)->is_mptcp) {
+		u64 local_key;
+		u64 remote_key;
+		if (mptcp_synack_options(req, &local_key, &remote_key)) {
+			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
+				opts->options |= OPTION_MPTCP;
+				opts->suboptions |= OPTION_MPTCP_MPC_SYNACK;
+				opts->sndr_key = local_key;
+				opts->rcvr_key = remote_key;
+				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
+			}
+		}
+	}
+
 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
 
 	return MAX_TCP_OPTION_SPACE - remaining;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4b1cbc3b3efe..7e48d1d92aac 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -161,3 +161,17 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
 	}
 	return subflow->request_mptcp;
 }
+
+unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
+				  u64 *remote_key)
+{
+	struct subflow_request_sock *subflow_req = subflow_rsk(req);
+
+	if (subflow_req->mp_capable) {
+		*local_key = subflow_req->local_key;
+		*remote_key = subflow_req->remote_key;
+		pr_debug("local_key=%llu", *local_key);
+		pr_debug("remote_key=%llu", *remote_key);
+	}
+	return subflow_req->mp_capable;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1a3412a742ea..9f802f69a528 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
 	}
 }
 
+static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+				 bool kern)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct socket *listener = msk->subflow;
+	struct socket *new_sock;
+	struct socket *mp;
+	struct subflow_sock *subflow;
+
+	pr_debug("msk=%p, listener=%p", msk, listener->sk);
+	*err = kernel_accept(listener, &new_sock, flags);
+	if (*err < 0)
+		return NULL;
+
+	subflow = subflow_sk(new_sock->sk);
+	pr_debug("new_sock=%p", subflow);
+
+	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
+	if (*err < 0) {
+		kernel_sock_shutdown(new_sock, SHUT_RDWR);
+		sock_release(new_sock);
+		return NULL;
+	}
+
+	msk = mptcp_sk(mp->sk);
+	pr_debug("msk=%p", msk);
+	subflow->conn = mp->sk;
+
+	if (subflow->mp_capable) {
+		msk->remote_key = subflow->remote_key;
+		msk->local_key = subflow->local_key;
+		msk->connection_list = new_sock;
+	} else {
+		msk->subflow = new_sock;
+	}
+
+	return mp->sk;
+}
+
 static int mptcp_get_port(struct sock *sk, unsigned short snum)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
 int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
-	struct socket *subflow = msk->subflow;
+	int err;
 
-	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
+	pr_debug("msk=%p", msk);
 
-	return inet_bind(subflow, uaddr, addr_len);
+	if (msk->subflow == NULL) {
+		err = subflow_create(sock->sk);
+		if (err)
+			return err;
+	}
+	return inet_bind(msk->subflow, uaddr, addr_len);
 }
 
 int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
@@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
 }
 
+int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+	struct socket *subflow;
+	int err = -EPERM;
+
+	if (msk->connection_list)
+		subflow = msk->connection_list;
+	else
+		subflow = msk->subflow;
+
+	err = inet_getname(subflow, uaddr, peer);
+
+	return err;
+}
+
+int mptcp_stream_listen(struct socket *sock, int backlog)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+	int err;
+
+	pr_debug("msk=%p", msk);
+
+	if (msk->subflow == NULL) {
+		err = subflow_create(sock->sk);
+		if (err)
+			return err;
+	}
+	return inet_listen(msk->subflow, backlog);
+}
+
+int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
+			bool kern)
+{
+	struct mptcp_sock *msk = mptcp_sk(sock->sk);
+
+	pr_debug("msk=%p", msk);
+
+	if (msk->subflow == NULL) {
+		return -EINVAL;
+	}
+	return inet_accept(sock, newsock, flags, kern);
+}
+
 static struct proto mptcp_prot = {
 	.name		= "MPTCP",
 	.owner		= THIS_MODULE,
 	.init		= mptcp_init_sock,
 	.close		= mptcp_close,
-	.accept		= inet_csk_accept,
+	.accept		= mptcp_accept,
 	.shutdown	= tcp_shutdown,
 	.sendmsg	= mptcp_sendmsg,
 	.recvmsg	= mptcp_recvmsg,
@@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
 	.bind		   = mptcp_stream_bind,
 	.connect	   = mptcp_stream_connect,
 	.socketpair	   = sock_no_socketpair,
-	.accept		   = inet_accept,
-	.getname	   = inet_getname,
+	.accept		   = mptcp_stream_accept,
+	.getname	   = mptcp_stream_getname,
 	.poll		   = tcp_poll,
 	.ioctl		   = inet_ioctl,
-	.listen		   = inet_listen,
+	.listen		   = mptcp_stream_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5e5fdcb3175f..89fcc3b746eb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 }
 
+static void subflow_v4_init_req(struct request_sock *req,
+				const struct sock *sk_listener,
+				struct sk_buff *skb)
+{
+	struct subflow_request_sock *subflow_req = subflow_rsk(req);
+	struct subflow_sock *listener = subflow_sk(sk_listener);
+	struct tcp_options_received rx_opt;
+
+	tcp_rsk(req)->is_mptcp = 1;
+	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
+
+	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
+
+	rx_opt.mptcp.flags = 0;
+	rx_opt.mptcp.mp_capable = 0;
+	rx_opt.mptcp.mp_join = 0;
+	rx_opt.mptcp.dss = 0;
+	mptcp_get_options(skb, &rx_opt);
+
+	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
+		subflow_req->mp_capable = 1;
+		if (rx_opt.mptcp.version >= listener->version)
+			subflow_req->version = listener->version;
+		else
+			subflow_req->version = rx_opt.mptcp.version;
+		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
+		    listener->checksum)
+			subflow_req->checksum = 1;
+		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
+	} else {
+		subflow_req->mp_capable = 0;
+	}
+}
+
 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 {
 	struct subflow_sock *subflow = subflow_sk(sk);
@@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
+static struct request_sock_ops subflow_request_sock_ops;
+static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
+
+static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+
+	pr_debug("subflow=%p", subflow);
+
+	/* Never answer to SYNs sent to broadcast or multicast */
+	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+		goto drop;
+
+	return tcp_conn_request(&subflow_request_sock_ops,
+				&subflow_request_sock_ipv4_ops,
+				sk, skb);
+drop:
+	tcp_listendrop(sk);
+	return 0;
+}
+
+static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+					  struct sk_buff *skb,
+					  struct request_sock *req,
+					  struct dst_entry *dst,
+					  struct request_sock *req_unhash,
+					  bool *own_req)
+{
+	struct subflow_sock *listener = subflow_sk(sk);
+	struct subflow_request_sock *subflow_req = subflow_rsk(req);
+	struct sock *child;
+
+	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
+
+	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
+
+	if (child) {
+		struct subflow_sock *subflow = subflow_sk(child);
+
+		pr_debug("child=%p", child);
+		if (subflow_req->mp_capable) {
+			subflow->mp_capable = 1;
+			subflow->fourth_ack = 1;
+			subflow->remote_key = subflow_req->remote_key;
+			subflow->local_key = subflow_req->local_key;
+		} else {
+			subflow->mp_capable = 0;
+		}
+	}
+
+	return child;
+}
+
 const struct inet_connection_sock_af_ops subflow_specific = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
 	.sk_rx_dst_set	   = subflow_finish_connect,
-	.conn_request	   = tcp_v4_conn_request,
-	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
+	.conn_request	   = subflow_conn_request,
+	.syn_recv_sock	   = subflow_syn_recv_sock,
 	.net_header_len	   = sizeof(struct iphdr),
 	.setsockopt	   = ip_setsockopt,
 	.getsockopt	   = ip_getsockopt,
@@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
 	tcp_close(sk, timeout);
 }
 
+static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
+				   bool kern)
+{
+	struct subflow_sock *subflow = subflow_sk(sk);
+	struct sock *child;
+
+	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
+
+	child = inet_csk_accept(sk, flags, err, kern);
+
+	pr_debug("child=%p", child);
+
+	return child;
+}
+
 static void subflow_destroy(struct sock *sk)
 {
 	pr_debug("subflow=%p", sk);
@@ -125,7 +227,7 @@ static struct proto subflow_prot = {
 	.close		= subflow_close,
 	.connect	= subflow_connect,
 	.disconnect	= tcp_disconnect,
-	.accept		= inet_csk_accept,
+	.accept		= subflow_accept,
 	.ioctl		= tcp_ioctl,
 	.init		= subflow_init_sock,
 	.destroy	= subflow_destroy,
@@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
 
 	/* TODO: Register path manager callbacks. */
 
+	subflow_request_sock_ops = tcp_request_sock_ops;
+	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
+
+	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
+	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
+
 	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
+	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
 	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
 	err = proto_register(&subflow_prot, 1);
 	if (err)
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-10-08 21:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-08 21:38 [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections Krystad, Peter
  -- strict thread matches above, loose matches on Subject: below --
2018-10-08 20:31 cpaasch
2018-10-08 20:22 Krystad, Peter
2018-10-08 18:22 Christoph Paasch
2018-10-05 22:59 Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.