mptcp.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Dmytro SHYTYI <dmytro@shytyi.net>
To: mptcp@lists.linux.dev
Cc: Dmytro SHYTYI <dmytro@shytyi.net>
Subject: [RFC PATCH mptcp-next v3] mptcp: Fast Open Mechanism
Date: Sun, 22 May 2022 19:39:21 +0100	[thread overview]
Message-ID: <20220522183921.103526-1-dmytro@shytyi.net> (raw)

This set of patches will bring "Fast Open" Option support to MPTCP.
The aim of Fast Open Mechanism is to eliminate one round trip
time from a TCP conversation by allowing data to be included as
part of the SYN segment that initiates the connection.

IETF RFC 8684: Appendix B.  TCP Fast Open and MPTCP.

[PATCH v3] includes "client-server" partial support for :
1. MPTCP cookie request from client.
2. MPTCP cookie offering from server.
3. MPTCP SYN+DATA+COOKIE from client.
4. subsequent write + read on the opened socket.

This patch is Work In Progress transitional draft. There was a pause
in code development that was unpaused recently. Now this code is based
on the top of mptcp-next branch. The option below will be modified in
future inelligently, depending on socket type (TCP||MPTCP):
*tcp_options ^= OPTION_TS
You also might notice some of commented pieces of the upstream code -
that (is probably not good) and was done to observe an
expected behavior of MPTCP Fast Open mechanism. Any comments how
to achive the same behavior of MPTCP_FO without commenting the related
parts of the code are welcome.

Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net>
---
 include/net/mptcp.h     |  2 +-
 net/ipv4/tcp_fastopen.c |  4 +++
 net/ipv4/tcp_input.c    |  7 ++---
 net/ipv4/tcp_output.c   |  3 +--
 net/mptcp/options.c     |  8 ++++--
 net/mptcp/protocol.c    | 59 ++++++++++++++++++++++++++++++++++++++---
 net/mptcp/sockopt.c     | 41 ++++++++++++++++++++++++++++
 net/mptcp/subflow.c     |  9 ++++---
 8 files changed, 118 insertions(+), 15 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6456ea26e4c7..692197187af8 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -139,7 +139,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space);
 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 		       unsigned int *size, struct mptcp_out_options *opts);
 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
-			  struct mptcp_out_options *opts);
+			  struct mptcp_out_options *opts, u16 *tcp_options);
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdbcf2a6d08e..f5f189e4d15a 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -346,8 +346,10 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct tcp_fastopen_cookie *foc,
 			      const struct dst_entry *dst)
 {
+	/*
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
 	int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+	*/
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	struct sock *child;
 	int ret = 0;
@@ -355,12 +357,14 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	if (foc->len == 0) /* Client requests a cookie */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
 
+	/*
 	if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
 	      (syn_data || foc->len >= 0) &&
 	      tcp_fastopen_queue_check(sk))) {
 		foc->len = -1;
 		return NULL;
 	}
+	*/
 
 	if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
 		goto fastopen;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3231af73e430..38119b96171d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6273,9 +6273,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		}
 		if (fastopen_fail)
 			return -1;
-		if (sk->sk_write_pending ||
-		    icsk->icsk_accept_queue.rskq_defer_accept ||
-		    inet_csk_in_pingpong_mode(sk)) {
+
+		if (!sk_is_mptcp(sk) && (sk->sk_write_pending ||
+		     icsk->icsk_accept_queue.rskq_defer_accept ||
+		     inet_csk_in_pingpong_mode(sk))) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
 			 *
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4b2284ed4a2..864517e63bdf 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -747,7 +747,7 @@ static void mptcp_set_option_cond(const struct request_sock *req,
 	if (rsk_is_mptcp(req)) {
 		unsigned int size;
 
-		if (mptcp_synack_options(req, &size, &opts->mptcp)) {
+		if (mptcp_synack_options(req, &size, &opts->mptcp, &opts->options)) {
 			if (*remaining >= size) {
 				opts->options |= OPTION_MPTCP;
 				*remaining -= size;
@@ -822,7 +822,6 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
 		}
 	}
-
 	smc_set_option(tp, opts, &remaining);
 
 	if (sk_is_mptcp(sk)) {
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index be3b918a6d15..ebcb9c04ead9 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -887,16 +887,20 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 }
 
 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
-			  struct mptcp_out_options *opts)
+			  struct mptcp_out_options *opts, u16 *tcp_options)
 {
 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+#define OPTION_TS               BIT(1)
+
+
+        *tcp_options ^= OPTION_TS;
 
 	if (subflow_req->mp_capable) {
 		opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
 		opts->sndr_key = subflow_req->local_key;
 		opts->csum_reqd = subflow_req->csum_reqd;
 		opts->allow_join_id0 = subflow_req->allow_join_id0;
-		*size = TCPOLEN_MPTCP_MPC_SYNACK;
+		*size = TCPOLEN_MPTCP_MPC_SYNACK  - TCPOLEN_TSTAMP_ALIGNED + TCPOLEN_SACKPERM_ALIGNED;
 		pr_debug("subflow_req=%p, local_key=%llu",
 			 subflow_req, subflow_req->local_key);
 		return true;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d6aef4b13b8a..6649088baae5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -54,6 +54,8 @@ static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_sm
 
 static void __mptcp_destroy_sock(struct sock *sk);
 static void __mptcp_check_send_data_fin(struct sock *sk);
+static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+				int addr_len, int flags);
 
 DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
 static struct net_device mptcp_napi_dev;
@@ -1673,6 +1675,53 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 	}
 }
 
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+				  size_t len, struct mptcp_sock *msk, size_t copied)
+{
+	const struct iphdr *iph;
+	struct ubuf_info *uarg;
+	struct sockaddr *uaddr;
+	struct sk_buff *skb;
+	struct tcp_sock *tp;
+	struct socket *ssk;
+	int ret;
+
+	ssk = __mptcp_nmpc_socket(msk);
+	if (unlikely(!ssk))
+		goto out_EFAULT;
+	skb = tcp_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true);
+	if (unlikely(!skb))
+		goto out_EFAULT;
+	iph = ip_hdr(skb);
+	if (unlikely(!iph))
+		goto out_EFAULT;
+	uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb));
+	if (unlikely(!uarg))
+		goto out_EFAULT;
+	uaddr = msg->msg_name;
+
+	tp = tcp_sk(ssk->sk);
+	if (unlikely(!tp))
+		goto out_EFAULT;
+	if (!tp->fastopen_req)
+		tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation);
+
+	if (unlikely(!tp->fastopen_req))
+		goto out_EFAULT;
+	tp->fastopen_req->data = msg;
+	tp->fastopen_req->size = len;
+	tp->fastopen_req->uarg = uarg;
+
+	/* requests a cookie */
+	ret = mptcp_stream_connect(sk->sk_socket, uaddr,
+				   msg->msg_namelen, msg->msg_flags);
+
+	return ret;
+out_EFAULT:
+	ret = -EFAULT;
+	return ret;
+}
+
 static void mptcp_set_nospace(struct sock *sk)
 {
 	/* enable autotune */
@@ -1690,9 +1739,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int ret = 0;
 	long timeo;
 
-	/* we don't support FASTOPEN yet */
+	/* we don't fully support FASTOPEN yet */
 	if (msg->msg_flags & MSG_FASTOPEN)
-		return -EOPNOTSUPP;
+		ret = mptcp_sendmsg_fastopen(sk, msg, len, msk, copied);
 
 	/* silently ignore everything else */
 	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
@@ -2558,10 +2607,10 @@ static void mptcp_worker(struct work_struct *work)
 
 	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
 		__mptcp_close_subflow(msk);
-
+/*
 	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
 		__mptcp_retrans(sk);
-
+*/
 	mptcp_mp_fail_no_response(msk);
 
 unlock:
@@ -2681,6 +2730,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
 	case TCP_SYN_SENT:
 		tcp_disconnect(ssk, O_NONBLOCK);
 		break;
+	case TCP_ESTABLISHED:
+		break;
 	default:
 		if (__mptcp_check_fallback(mptcp_sk(sk))) {
 			pr_debug("Fallback");
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 423d3826ca1e..e1ae1ef224cf 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -560,6 +560,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
 		case TCP_TX_DELAY:
 		case TCP_INQ:
 			return true;
+		case TCP_FASTOPEN:
+			return true;
 		}
 
 		/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
@@ -768,6 +770,43 @@ static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optv
 	return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen);
 }
 
+static int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval,
+					     unsigned int optlen)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	struct net *net = sock_net(sk);
+	int val;
+	int ret;
+
+	ret = 0;
+
+	if (copy_from_sockptr(&val, optval, sizeof(val)))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		lock_sock(ssk);
+
+		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
+		    TCPF_LISTEN))) {
+			tcp_fastopen_init_key_once(net);
+			fastopen_queue_tune(sk, val);
+		} else {
+			ret = -EINVAL;
+		}
+
+		release_sock(ssk);
+	}
+
+	release_sock(sk);
+
+	return ret;
+}
+
 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 				    sockptr_t optval, unsigned int optlen)
 {
@@ -796,6 +835,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 		return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
 	case TCP_DEFER_ACCEPT:
 		return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen);
+	case TCP_FASTOPEN:
+		return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen);
 	}
 
 	return -EOPNOTSUPP;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8841e8cd9ad8..f732e41e12df 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1002,16 +1002,17 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 			sk_eat_skb(ssk, skb);
 			return MAPPING_EMPTY;
 		}
-
+/*
 		if (!subflow->map_valid)
 			return MAPPING_INVALID;
-
+*/
 		goto validate_seq;
 	}
 
 	trace_get_mapping_status(mpext);
 
 	data_len = mpext->data_len;
+
 	if (data_len == 0) {
 		pr_debug("infinite mapping received");
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
@@ -1075,6 +1076,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 		/* If this skb data are fully covered by the current mapping,
 		 * the new map would need caching, which is not supported
 		 */
+
 		if (skb_is_fully_mapped(ssk, skb)) {
 			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
 			return MAPPING_INVALID;
@@ -1107,11 +1109,12 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 	/* we revalidate valid mapping on new skb, because we must ensure
 	 * the current skb is completely covered by the available mapping
 	 */
+	/*
 	if (!validate_mapping(ssk, skb)) {
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
 		return MAPPING_INVALID;
 	}
-
+	*/
 	skb_ext_del(skb, SKB_EXT_MPTCP);
 
 validate_csum:
-- 
2.25.1



             reply	other threads:[~2022-05-22 18:40 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-22 18:39 Dmytro SHYTYI [this message]
2022-05-22 18:47 ` mptcp: Fast Open Mechanism: Build Failure MPTCP CI
2022-05-22 18:59 ` mptcp: Fast Open Mechanism: Tests Results MPTCP CI
2022-05-25  1:04 ` [RFC PATCH mptcp-next v3] mptcp: Fast Open Mechanism Mat Martineau
2022-05-26 21:32   ` Mat Martineau
2022-06-05  1:09   ` Dmytro SHYTYI
2022-06-08  0:38     ` Mat Martineau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220522183921.103526-1-dmytro@shytyi.net \
    --to=dmytro@shytyi.net \
    --cc=mptcp@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).