mptcp.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [MPTCP] [RFC PATCH 2/4] tcp: move selected mptcp helpers to tcp.h/mptcp.h
@ 2020-09-24 14:35 ` Florian Westphal
  2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
  0 siblings, 1 reply; 20+ messages in thread
From: Florian Westphal @ 2020-09-24 14:35 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 2755 bytes --]

Will be needed to fill in the MPTCP reset option from tcp_ipv4/ipv6.c.

It would make more sense to place mptcp_option() in mptcp.h, but
TCPOPT_MPTCP is defined in tcp.h, and mptcp.h is included from tcp.h,
not the other way around.  Placing the helper in mptcp.h thus results
in a build failure because TCPOPT_MPTCP is not defined.

Signed-off-by: Florian Westphal <fw(a)strlen.de>
---
 include/net/mptcp.h  | 10 ++++++++++
 include/net/tcp.h    |  5 +++++
 net/mptcp/protocol.h | 11 -----------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6e706d838e4e..5f5062580e0e 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -68,6 +68,11 @@ static inline bool sk_is_mptcp(const struct sock *sk)
 	return tcp_sk(sk)->is_mptcp;
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
+}
+
 static inline bool rsk_is_mptcp(const struct request_sock *req)
 {
 	return tcp_rsk(req)->is_mptcp;
@@ -153,6 +158,11 @@ static inline void mptcp_init(void)
 {
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return NULL;
+}
+
 static inline bool sk_is_mptcp(const struct sock *sk)
 {
 	return false;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 852f0d71dd40..ea8c134802e8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2406,4 +2406,9 @@ static inline u64 tcp_transmit_time(const struct sock *sk)
 	return 0;
 }
 
+static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
+{
+	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
+		     ((nib & 0xF) << 8) | field);
+}
 #endif	/* _TCP_H */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7cfe52aeb2b8..e8c873c66182 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -130,12 +130,6 @@ struct mptcp_options_received {
 	u16	port;
 };
 
-static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
-{
-	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
-		     ((nib & 0xF) << 8) | field);
-}
-
 struct mptcp_addr_info {
 	sa_family_t		family;
 	__be16			port;
@@ -486,11 +480,6 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
 void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 
-static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
-{
-	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
-}
-
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
 	return (__s64)(seq1 - seq2) < 0;
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP] [RFC PATCH 4/4] tcp: parse tcp options contained in reset packets
@ 2020-09-24 14:35 ` Florian Westphal
  2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
  0 siblings, 1 reply; 20+ messages in thread
From: Florian Westphal @ 2020-09-24 14:35 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 3405 bytes --]

This will be used to handle MPTCP_TCPRST suboption.

It allows an MPTCP receiver to learn more information when a subflow is
re-set.  The MPTCP_TCPRST option gives an error code (protocol error,
path too slow, middlebox interference detected, and so on).

This allows an MPTCP receiver to make a decision to reopen the subflow
at a later time, or even completely disable the path.

Signed-off-by: Florian Westphal <fw(a)strlen.de>
---
 include/net/tcp.h        |  2 +-
 net/ipv4/tcp_input.c     | 13 ++++++++-----
 net/ipv4/tcp_minisocks.c |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a981b5d60112..92eee154e2a3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -609,7 +609,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 /* tcp_input.c */
 void tcp_rearm_rto(struct sock *sk);
 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
-void tcp_reset(struct sock *sk);
+void tcp_reset(struct sock *sk, struct sk_buff *skb);
 void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
 void tcp_fin(struct sock *sk);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8afa4af30fdc..0a10ba1df1a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4206,10 +4206,13 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
 }
 
 /* When we get a reset we do this. */
-void tcp_reset(struct sock *sk)
+void tcp_reset(struct sock *sk, struct sk_buff *skb)
 {
 	trace_tcp_receive_reset(sk);
 
+	if (sk_is_mptcp(sk))
+		mptcp_incoming_options(sk, skb);
+
 	/* We want the right error as BSD sees it (and indeed as we do). */
 	switch (sk->sk_state) {
 	case TCP_SYN_SENT:
@@ -5590,7 +5593,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 						  &tp->last_oow_ack_time))
 				tcp_send_dupack(sk, skb);
 		} else if (tcp_reset_check(sk, skb)) {
-			tcp_reset(sk);
+			tcp_reset(sk, skb);
 		}
 		goto discard;
 	}
@@ -5626,7 +5629,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		}
 
 		if (rst_seq_match)
-			tcp_reset(sk);
+			tcp_reset(sk, skb);
 		else {
 			/* Disable TFO if RST is out-of-order
 			 * and no data has been received
@@ -6059,7 +6062,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		if (th->rst) {
-			tcp_reset(sk);
+			tcp_reset(sk, skb);
 			goto discard;
 		}
 
@@ -6501,7 +6504,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
-				tcp_reset(sk);
+				tcp_reset(sk, skb);
 				return 1;
 			}
 		}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 56c306e3cd2f..12f2495f98df 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -802,7 +802,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		req->rsk_ops->send_reset(sk, skb);
 	} else if (fastopen) { /* received a valid RST pkt */
 		reqsk_fastopen_remove(sk, req, true);
-		tcp_reset(sk);
+		tcp_reset(sk, skb);
 	}
 	if (!fastopen) {
 		inet_csk_reqsk_queue_drop(sk, req);
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP] [RFC mptpcp-next] mptcp: add ooo prune support
@ 2020-10-02 15:45 ` Florian Westphal
  2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
  0 siblings, 1 reply; 20+ messages in thread
From: Florian Westphal @ 2020-10-02 15:45 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 4557 bytes --]

It might be possible that entire receive buffer is occupied by
skbs in the OOO queue.

In this case we can't pull more skbs from subflows and the holes
will never be filled.

If this happens, schedule the work queue and prune ~12% of skbs to
make space available. Also add a MIB counter for this.

Signed-off-by: Florian Westphal <fw(a)strlen.de>
---
 Paolo, this does relate a bit to our discussion wrt. oow
 tracking.  I thought we might need to add some sort of cushion to
 account for window discrepancies, but that might then get us
 in a state where wmem might be full...

 What do you think?

 I did NOT see such a problem in practice, this is a theoretical "fix".
 TCP has similar code to deal with corner cases of small-oow packets.

 net/mptcp/mib.c      |  1 +
 net/mptcp/mib.h      |  1 +
 net/mptcp/protocol.c | 48 ++++++++++++++++++++++++++++++++++++++++++--
 net/mptcp/protocol.h |  1 +
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 84d119436b22..65c575e3af60 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
 	SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
 	SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
+	SNMP_MIB_ITEM("OFOPrune", MPTCP_MIB_OFOPRUNE),
 	SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
 	SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
 	SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 47bcecce1106..75a7fb3a87db 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_OFOQUEUETAIL,	/* Segments inserted into OoO queue tail */
 	MPTCP_MIB_OFOQUEUE,		/* Segments inserted into OoO queue */
 	MPTCP_MIB_OFOMERGE,		/* Segments merged in OoO queue */
+	MPTCP_MIB_OFOPRUNE,		/* Segments pruned from OoO queue */
 	MPTCP_MIB_NODSSWINDOW,		/* Segments not in MPTCP windows */
 	MPTCP_MIB_DUPDATA,		/* Segments discarded due to duplicate DSS */
 	MPTCP_MIB_ADDADDR,		/* Received ADD_ADDR with echo-flag=0 */
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 79cd8e879c10..4cc30a3d426c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -658,8 +658,17 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 		sk_rbuf = ssk_rbuf;
 
 	/* over limit? can't append more skbs to msk */
-	if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
-		goto wake;
+	if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) {
+		if (likely(!skb_queue_empty(&sk->sk_receive_queue)))
+			goto wake;
+
+		/* Entire recvbuf occupied by OOO skbs? Prune time. */
+		if (!test_and_set_bit(MPTCP_WORK_PRUNE_OFO, &msk->flags) &&
+		     schedule_work(&msk->work))
+			sock_hold(sk);
+
+		return;
+	}
 
 	if (move_skbs_to_msk(msk, ssk))
 		goto wake;
@@ -1797,6 +1806,38 @@ static bool mptcp_check_close_timeout(const struct sock *sk)
 	return true;
 }
 
+static void mptcp_prune_ofo(struct mptcp_sock *msk)
+{
+	struct sock *sk = &msk->sk.icsk_inet.sk;
+	struct sk_buff *skb, *prev = NULL;
+	int goal;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return;
+
+	if (WARN_ON_ONCE(RB_EMPTY_ROOT(&msk->out_of_order_queue)))
+		return;
+
+	MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOPRUNE);
+
+	goal = READ_ONCE(sk->sk_rcvbuf) >> 3;
+	skb = msk->ooo_last_skb;
+
+	while (skb) {
+		prev = skb_rb_prev(skb);
+		rb_erase(&skb->rbnode, &msk->out_of_order_queue);
+		goal -= skb->truesize;
+		mptcp_drop(sk, skb);
+
+		if (goal <= 0)
+			break;
+		skb = prev;
+	}
+
+	msk->ooo_last_skb = prev;
+}
+
 static void mptcp_worker(struct work_struct *work)
 {
 	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -1819,6 +1860,9 @@ static void mptcp_worker(struct work_struct *work)
 	if (mptcp_send_head(sk))
 		mptcp_push_pending(sk, 0);
 
+	if (test_and_clear_bit(MPTCP_WORK_PRUNE_OFO, &msk->flags))
+		mptcp_prune_ofo(msk);
+
 	if (msk->pm.status)
 		pm_work(msk);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d33e9676a1a3..360441fdaa93 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -91,6 +91,7 @@
 #define MPTCP_WORK_EOF		3
 #define MPTCP_FALLBACK_DONE	4
 #define MPTCP_WORKER_RUNNING	5
+#define MPTCP_WORK_PRUNE_OFO	6
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP] [PATCH MPTCP 1/5] tcp: make two mptcp helpers available to tcp stack
@ 2020-11-05 17:01 ` Florian Westphal
  2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
  0 siblings, 1 reply; 20+ messages in thread
From: Florian Westphal @ 2020-11-05 17:01 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 2820 bytes --]

needed by followup patches to add mptcp reset (and fastclose)
options to tcp reset packets.

Signed-off-by: Florian Westphal <fw(a)strlen.de>
---
 include/net/mptcp.h  | 10 ++++++++++
 include/net/tcp.h    |  5 +++++
 net/mptcp/protocol.h | 11 -----------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index b6cf07143a8a..3d57607982fa 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -78,6 +78,11 @@ static inline bool rsk_drop_req(const struct request_sock *req)
 	return tcp_rsk(req)->is_mptcp && tcp_rsk(req)->drop_req;
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
+}
+
 void mptcp_space(const struct sock *ssk, int *space, int *full_space);
 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 		       unsigned int *size, struct mptcp_out_options *opts);
@@ -169,6 +174,11 @@ static inline bool rsk_drop_req(const struct request_sock *req)
 	return false;
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return NULL;
+}
+
 static inline void mptcp_parse_option(const struct sk_buff *skb,
 				      const unsigned char *ptr, int opsize,
 				      struct tcp_options_received *opt_rx)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f3d42cb626fc..8115164e0df6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2377,4 +2377,9 @@ static inline u64 tcp_transmit_time(const struct sock *sk)
 	return 0;
 }
 
+static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
+{
+	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
+		     ((nib & 0xF) << 8) | field);
+}
 #endif	/* _TCP_H */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d29c6a4749eb..66bd4d096753 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -139,12 +139,6 @@ struct mptcp_options_received {
 	u16	port;
 };
 
-static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
-{
-	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
-		     ((nib & 0xF) << 8) | field);
-}
-
 struct mptcp_addr_info {
 	sa_family_t		family;
 	__be16			port;
@@ -568,11 +562,6 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
 void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 
-static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
-{
-	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
-}
-
 void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
 
 static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP] [PATCH MPTCP 5/5] mptcp: send fastclose if userspace closes socket with unread data
@ 2020-11-05 17:01 ` Florian Westphal
  2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
  0 siblings, 1 reply; 20+ messages in thread
From: Florian Westphal @ 2020-11-05 17:01 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 4648 bytes --]

Add building & sending of FASTCLOSE option.
RFC 8684 describes two methods:

A): Host sends an ACK containing the MP_FASTCLOSE
    option on one subflow [..] On all the other subflows,
    Host A sends a regular TCP RST to close these subflows and tears
    them down. [..]

R): Host A sends a RST containing the MP_FASTCLOSE option on all
    subflows [..].  Host A can tear down the subflows and the
    connection immediately.

This implements option R) only: All subflows are re-set with FASTCLOSE.

Signed-off-by: Florian Westphal <fw(a)strlen.de>
---
 net/mptcp/options.c  | 35 +++++++++++++++++++++++++++++++++++
 net/mptcp/protocol.c | 32 +++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  1 +
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 0a940687f738..08b60d527de0 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -681,6 +681,31 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
 	return true;
 }
 
+static bool mptcp_fastclose(const struct mptcp_sock *msk)
+{
+	return READ_ONCE(msk->snd_fastclose);
+}
+
+static bool mptcp_established_options_fastclose(struct sock *sk,
+						unsigned int *size,
+						unsigned int remaining,
+						struct mptcp_out_options *opts)
+{
+	const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (likely(!mptcp_fastclose(mptcp_sk(subflow->conn))))
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FASTCLOSE)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FASTCLOSE;
+	opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
+	opts->rcvr_key = subflow->remote_key;
+
+	return true;
+}
+
 static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
 						   unsigned int *size,
 						   unsigned int remaining,
@@ -691,6 +716,9 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	if (remaining < TCPOLEN_MPTCP_RST)
 		return;
 
+	if (mptcp_established_options_fastclose(sk, size, remaining, opts))
+		return;
+
 	*size = TCPOLEN_MPTCP_RST;
 	opts->suboptions |= OPTION_MPTCP_RST;
 	opts->reset_transient = subflow->reset_transient;
@@ -1179,6 +1207,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 		ptr += 5;
 	}
 
+	if (OPTION_MPTCP_FASTCLOSE & opts->suboptions) {
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE,
+				      TCPOLEN_MPTCP_FASTCLOSE, 0, 0);
+		put_unaligned_be64(opts->rcvr_key, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_RST & opts->suboptions)
 		*ptr++ = mptcp_option(MPTCPOPT_RST,
 				      TCPOLEN_MPTCP_RST,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7e9705943813..6b6efa00cad5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2133,6 +2133,29 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
 	}
 }
 
+static void __mptcp_send_fastclose(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow, *tmp;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	WRITE_ONCE(msk->snd_fastclose, true);
+
+	__mptcp_flush_join_list(msk);
+	__mptcp_clear_xmit(sk);
+
+	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+
+	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+
+		lock_sock(tcp_sk);
+		subflow->reset_transient = 0;
+		subflow->reset_reason = MPTCP_RST_EMPTCP;
+		mptcp_subflow_reset(tcp_sk);
+		release_sock(tcp_sk);
+	}
+}
+
 static void __mptcp_wr_shutdown(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2185,6 +2208,7 @@ static void mptcp_close(struct sock *sk, long timeout)
 {
 	struct mptcp_subflow_context *subflow;
 	bool do_cancel_work = false;
+	bool send_fin = false;
 
 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2197,7 +2221,13 @@ static void mptcp_close(struct sock *sk, long timeout)
 		goto cleanup;
 	}
 
-	if (mptcp_close_state(sk))
+	send_fin = mptcp_close_state(sk);
+	if (!skb_queue_empty(&sk->sk_receive_queue)) {
+		__mptcp_send_fastclose(sk);
+		send_fin = false;
+	}
+
+	if (send_fin)
 		__mptcp_wr_shutdown(sk);
 
 	sk_stream_wait_close(sk, timeout);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d4c99e091cb9..93352044bff9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -243,6 +243,7 @@ struct mptcp_sock {
 	bool		fully_established;
 	bool		rcv_data_fin;
 	bool		snd_data_fin_enable;
+	bool		snd_fastclose;
 	bool		rcv_fastclose;
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 	spinlock_t	join_list_lock;
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP][PATCH mptcp-next 0/3] MP_FAIL support
@ 2021-05-06  6:39 Geliang Tang
  2021-05-06  6:39 ` [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Geliang Tang
  0 siblings, 1 reply; 20+ messages in thread
From: Geliang Tang @ 2021-05-06  6:39 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

apply:	export/20210504T064955 +
	"data checksum support" +
	"data checksum support cleanups" +
	"add MP_CAPABLE 'C' flag"

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52

Geliang Tang (3):
  mptcp: MP_FAIL suboption sending
  mptcp: MP_FAIL suboption receiving
  mptcp: send out MP_FAIL when data checksum fail

 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 62 +++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/pm.c       | 17 ++++++++++++
 net/mptcp/protocol.h |  7 +++++
 net/mptcp/subflow.c  |  2 ++
 5 files changed, 88 insertions(+), 1 deletion(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending
  2021-05-06  6:39 [MPTCP][PATCH mptcp-next 0/3] MP_FAIL support Geliang Tang
@ 2021-05-06  6:39 ` Geliang Tang
  2021-05-06  6:39   ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Geliang Tang
  2021-05-26 16:08   ` [RESEND] [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Matthieu Baerts
  0 siblings, 2 replies; 20+ messages in thread
From: Geliang Tang @ 2021-05-06  6:39 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the MP_FAIL suboption sending support.

Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
this flag is set, send out MP_FAIL suboption.

Add a new member fail_seq in struct mptcp_out_options to save the data
sequence number to put into the MP_FAIL suboption.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 46 +++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  4 ++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index cb580b06152f..f48d3b5a3fd4 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -72,6 +72,7 @@ struct mptcp_out_options {
 	u32 nonce;
 	u64 thmac;
 	u32 token;
+	u64 fail_seq;
 	u8 hmac[20];
 	struct mptcp_ext ext_copy;
 #endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4179287bd647..485c5a77e71b 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -777,6 +777,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	opts->reset_reason = subflow->reset_reason;
 }
 
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+					      unsigned int *size,
+					      unsigned int remaining,
+					      struct mptcp_out_options *opts)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (!subflow->send_mp_fail)
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FAIL)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FAIL;
+	opts->suboptions |= OPTION_MPTCP_FAIL;
+	opts->fail_seq = subflow->fail_seq;
+
+	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+	return true;
+}
+
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts)
@@ -792,8 +814,16 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 	if (unlikely(__mptcp_check_fallback(msk)))
 		return false;
 
+	if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+		*size += opt_size;
+		remaining -= opt_size;
+		ret = true;
+	}
+
 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
-		mptcp_established_options_rst(sk, skb, size, remaining, opts);
+		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
+		*size += opt_size;
+		remaining -= opt_size;
 		return true;
 	}
 
@@ -1338,6 +1368,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 				      opts->backup, TCPOPT_NOP);
 	}
 
+	if (OPTION_MPTCP_FAIL & opts->suboptions) {
+		const struct sock *ssk = (const struct sock *)tp;
+		struct mptcp_subflow_context *subflow;
+
+		subflow = mptcp_subflow_ctx(ssk);
+		subflow->send_mp_fail = 0;
+
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+				      TCPOLEN_MPTCP_FAIL,
+				      0, 0);
+		put_unaligned_be64(opts->fail_seq, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
 				      TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 41baa2ffc9a9..ff70b3e97dd0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,7 @@
 #define OPTION_MPTCP_FASTCLOSE	BIT(8)
 #define OPTION_MPTCP_PRIO	BIT(9)
 #define OPTION_MPTCP_RST	BIT(10)
+#define OPTION_MPTCP_FAIL	BIT(11)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -67,6 +68,7 @@
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
 #define TCPOLEN_MPTCP_RST		4
+#define TCPOLEN_MPTCP_FAIL		12
 
 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
 
@@ -419,6 +421,7 @@ struct mptcp_subflow_context {
 		mpc_map : 1,
 		backup : 1,
 		send_mp_prio : 1,
+		send_mp_fail : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
@@ -433,6 +436,7 @@ struct mptcp_subflow_context {
 	u8	reset_seen:1;
 	u8	reset_transient:1;
 	u8	reset_reason:4;
+	u64	fail_seq;
 
 	long	delegated_status;
 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving
  2021-05-06  6:39 ` [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Geliang Tang
@ 2021-05-06  6:39   ` Geliang Tang
  2021-05-06  6:39     ` [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
                       ` (2 more replies)
  2021-05-26 16:08   ` [RESEND] [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Matthieu Baerts
  1 sibling, 3 replies; 20+ messages in thread
From: Geliang Tang @ 2021-05-06  6:39 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added handling for receiving MP_FAIL suboption.

Add a new members mp_fail and fail_seq in struct mptcp_options_received.
When MP_FAIL suboption is received, set mp_fail to 1 and save the sequence
number to fail_seq.

Then invoke mptcp_pm_mp_fail_received to deal with the MP_FAIL suboption.
In it, send MP_FAIL + RST and fallback.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/options.c  | 16 ++++++++++++++++
 net/mptcp/pm.c       | 17 +++++++++++++++++
 net/mptcp/protocol.h |  3 +++
 3 files changed, 36 insertions(+)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 485c5a77e71b..9795e3ccf6cc 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -341,6 +341,16 @@ static void mptcp_parse_option(const struct sock *sk,
 		mp_opt->reset_reason = *ptr;
 		break;
 
+	case MPTCPOPT_MP_FAIL:
+		if (opsize != TCPOLEN_MPTCP_FAIL)
+			break;
+
+		ptr += 2;
+		mp_opt->mp_fail = 1;
+		mp_opt->fail_seq = get_unaligned_be64(ptr);
+		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+		break;
+
 	default:
 		break;
 	}
@@ -367,6 +377,7 @@ void mptcp_get_options(const struct sock *sk,
 	mp_opt->reset = 0;
 	mp_opt->csum_reqd = 0;
 	mp_opt->deny_join_id0 = 0;
+	mp_opt->mp_fail = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -1127,6 +1138,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		mp_opt.mp_prio = 0;
 	}
 
+	if (mp_opt.mp_fail) {
+		mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+		mp_opt.mp_fail = 0;
+	}
+
 	if (mp_opt.reset) {
 		subflow->reset_seen = 1;
 		subflow->reset_reason = mp_opt.reset_reason;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 639271e09604..87152a2bcbc4 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -247,6 +247,23 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
 	mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
 }
 
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+	pr_debug("map_seq=%llu fail_seq=%llu %llu", subflow->map_seq, subflow->fail_seq, fail_seq);
+
+	if (subflow->fail_seq != fail_seq) {
+		subflow->send_mp_fail = 1;
+		subflow->fail_seq = fail_seq;
+		mptcp_subflow_reset(sk);
+	}
+
+	pr_fallback(msk);
+	__mptcp_do_fallback(msk);
+}
+
 /* path manager helpers */
 
 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ff70b3e97dd0..fef6adef3c99 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -138,6 +138,7 @@ struct mptcp_options_received {
 		add_addr : 1,
 		rm_addr : 1,
 		mp_prio : 1,
+		mp_fail : 1,
 		echo : 1,
 		csum_reqd : 1,
 		backup : 1,
@@ -159,6 +160,7 @@ struct mptcp_options_received {
 	u64	ahmac;
 	u8	reset_reason:4;
 	u8	reset_transient:1;
+	u64	fail_seq;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -697,6 +699,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 				 struct mptcp_addr_info *addr,
 				 u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail
  2021-05-06  6:39   ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Geliang Tang
@ 2021-05-06  6:39     ` Geliang Tang
  2021-05-08  0:54       ` Mat Martineau
  2021-05-26 16:08       ` [RESEND] " Matthieu Baerts
  2021-05-08  0:44     ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Mat Martineau
  2021-05-26 16:08     ` [RESEND] " Matthieu Baerts
  2 siblings, 2 replies; 20+ messages in thread
From: Geliang Tang @ 2021-05-06  6:39 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

When a bad checksum is detected, send out the MP_FAIL suboption.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/subflow.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 659b8842ae3b..efd84ff61015 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -906,6 +906,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
 	if (unlikely(csum_fold(csum))) {
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCSUMERR);
+		subflow->send_mp_fail = 1;
+		subflow->fail_seq = subflow->map_seq;
 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
 	}
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving
  2021-05-06  6:39   ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Geliang Tang
  2021-05-06  6:39     ` [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
@ 2021-05-08  0:44     ` Mat Martineau
  2021-05-26 16:08     ` [RESEND] " Matthieu Baerts
  2 siblings, 0 replies; 20+ messages in thread
From: Mat Martineau @ 2021-05-08  0:44 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Thu, 6 May 2021, Geliang Tang wrote:

> This patch added handling for receiving MP_FAIL suboption.
>
> Add a new members mp_fail and fail_seq in struct mptcp_options_received.
> When MP_FAIL suboption is received, set mp_fail to 1 and save the sequence
> number to fail_seq.
>
> Then invoke mptcp_pm_mp_fail_received to deal with the MP_FAIL suboption.
> In it, send MP_FAIL + RST and fallback.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> net/mptcp/options.c  | 16 ++++++++++++++++
> net/mptcp/pm.c       | 17 +++++++++++++++++
> net/mptcp/protocol.h |  3 +++
> 3 files changed, 36 insertions(+)
>
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 485c5a77e71b..9795e3ccf6cc 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -341,6 +341,16 @@ static void mptcp_parse_option(const struct sock *sk,
> 		mp_opt->reset_reason = *ptr;
> 		break;
>
> +	case MPTCPOPT_MP_FAIL:
> +		if (opsize != TCPOLEN_MPTCP_FAIL)
> +			break;
> +
> +		ptr += 2;
> +		mp_opt->mp_fail = 1;
> +		mp_opt->fail_seq = get_unaligned_be64(ptr);
> +		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
> +		break;
> +
> 	default:
> 		break;
> 	}
> @@ -367,6 +377,7 @@ void mptcp_get_options(const struct sock *sk,
> 	mp_opt->reset = 0;
> 	mp_opt->csum_reqd = 0;
> 	mp_opt->deny_join_id0 = 0;
> +	mp_opt->mp_fail = 0;
>
> 	length = (th->doff * 4) - sizeof(struct tcphdr);
> 	ptr = (const unsigned char *)(th + 1);
> @@ -1127,6 +1138,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
> 		mp_opt.mp_prio = 0;
> 	}
>
> +	if (mp_opt.mp_fail) {
> +		mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
> +		mp_opt.mp_fail = 0;
> +	}
> +
> 	if (mp_opt.reset) {
> 		subflow->reset_seen = 1;
> 		subflow->reset_reason = mp_opt.reset_reason;
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index 639271e09604..87152a2bcbc4 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -247,6 +247,23 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
> 	mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
> }
>
> +void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
> +{
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> +
> +	pr_debug("map_seq=%llu fail_seq=%llu %llu", subflow->map_seq, subflow->fail_seq, fail_seq);
> +
> +	if (subflow->fail_seq != fail_seq) {
> +		subflow->send_mp_fail = 1;
> +		subflow->fail_seq = fail_seq;
> +		mptcp_subflow_reset(sk);
> +	}
> +
> +	pr_fallback(msk);
> +	__mptcp_do_fallback(msk);

Note that RFC 8684 section 3.7 says that fallback is only required if 
there is a single active subflow.

The RFC also says that data following fail_seq should be discarded when 
there are multiple subflows open. It's similar to a forced checksum 
failure - the queued data that has already been received on the subflow 
that received the MP_FAIL is discarded and not DATA_ACKed.

-Mat


> +}
> +
> /* path manager helpers */
>
> bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index ff70b3e97dd0..fef6adef3c99 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -138,6 +138,7 @@ struct mptcp_options_received {
> 		add_addr : 1,
> 		rm_addr : 1,
> 		mp_prio : 1,
> +		mp_fail : 1,
> 		echo : 1,
> 		csum_reqd : 1,
> 		backup : 1,
> @@ -159,6 +160,7 @@ struct mptcp_options_received {
> 	u64	ahmac;
> 	u8	reset_reason:4;
> 	u8	reset_transient:1;
> +	u64	fail_seq;
> };
>
> static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
> @@ -697,6 +699,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
> int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
> 				 struct mptcp_addr_info *addr,
> 				 u8 bkup);
> +void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
> void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
> bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
> struct mptcp_pm_add_entry *
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail
  2021-05-06  6:39     ` [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
@ 2021-05-08  0:54       ` Mat Martineau
  2021-05-26 16:08       ` [RESEND] " Matthieu Baerts
  1 sibling, 0 replies; 20+ messages in thread
From: Mat Martineau @ 2021-05-08  0:54 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Thu, 6 May 2021, Geliang Tang wrote:

> When a bad checksum is detected, send out the MP_FAIL suboption.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> net/mptcp/subflow.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 659b8842ae3b..efd84ff61015 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -906,6 +906,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
> 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
> 	if (unlikely(csum_fold(csum))) {
> 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCSUMERR);
> +		subflow->send_mp_fail = 1;
> +		subflow->fail_seq = subflow->map_seq;

map_seq is the sequence number of the last good data *received* by this 
peer, but fail_seq will be used to discard data *sent* by this peer. So I 
think fail_seq needs to be based on the sequence number for outgoing data 
on this subflow? I admit the RFC has me confused here. If MP_FAIL is 
implemented on the multipath-tcp.org kernel it might be helpful to see how 
it is used there.


> 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
> 	}
>
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND] [PATCH 0/8] Please ignore: resending some patches for patchwork.kernel.org
@ 2021-05-26 16:08 Matthieu Baerts
  2020-09-24 14:35 ` [MPTCP] [RFC PATCH 2/4] tcp: move selected mptcp helpers to tcp.h/mptcp.h Florian Westphal
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Matthieu Baerts

As discussed as at previous weekly meetings, we decided to move from
patchwork.ozlabs.org to patchwork.kernel.org for various reasons:

  https://lore.kernel.org/mptcp/84426992-d161-ce7a-28c2-c578f521a96a@tessares.net/T/#u
  https://lore.kernel.org/mptcp/a2ee322e-3a55-f392-c697-d96d8054937d@tessares.net/T/#u

Some "old" patches needs to be re-sent to be able to track them on the new
instance.

Here they are, unmodified.

Please ignore them! (except if some are no longer relevant and can be dropped)

Cheers,
Matt
-- 
2.31.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP] [RFC PATCH 2/4] tcp: move selected mptcp helpers to tcp.h/mptcp.h
  2020-09-24 14:35 ` [MPTCP] [RFC PATCH 2/4] tcp: move selected mptcp helpers to tcp.h/mptcp.h Florian Westphal
@ 2021-05-26 16:08   ` Matthieu Baerts
  0 siblings, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

From: Florian Westphal <fw@strlen.de>

Will be needed to fill in the MPTCP reset option from tcp_ipv4/ipv6.c.

It would make more sense to place mptcp_option() in mptcp.h, but
TCPOPT_MPTCP is defined in tcp.h, and mptcp.h is included from tcp.h,
not the other way around.  Placing the helper in mptcp.h thus results
in a build failure because TCPOPT_MPTCP is not defined.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h  | 10 ++++++++++
 include/net/tcp.h    |  5 +++++
 net/mptcp/protocol.h | 11 -----------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6e706d838e4e..5f5062580e0e 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -68,6 +68,11 @@ static inline bool sk_is_mptcp(const struct sock *sk)
 	return tcp_sk(sk)->is_mptcp;
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
+}
+
 static inline bool rsk_is_mptcp(const struct request_sock *req)
 {
 	return tcp_rsk(req)->is_mptcp;
@@ -153,6 +158,11 @@ static inline void mptcp_init(void)
 {
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return NULL;
+}
+
 static inline bool sk_is_mptcp(const struct sock *sk)
 {
 	return false;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 852f0d71dd40..ea8c134802e8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2406,4 +2406,9 @@ static inline u64 tcp_transmit_time(const struct sock *sk)
 	return 0;
 }
 
+static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
+{
+	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
+		     ((nib & 0xF) << 8) | field);
+}
 #endif	/* _TCP_H */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7cfe52aeb2b8..e8c873c66182 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -130,12 +130,6 @@ struct mptcp_options_received {
 	u16	port;
 };
 
-static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
-{
-	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
-		     ((nib & 0xF) << 8) | field);
-}
-
 struct mptcp_addr_info {
 	sa_family_t		family;
 	__be16			port;
@@ -486,11 +480,6 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
 void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 
-static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
-{
-	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
-}
-
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
 	return (__s64)(seq1 - seq2) < 0;

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP] [RFC PATCH 4/4] tcp: parse tcp options contained in reset packets
  2020-09-24 14:35 ` [MPTCP] [RFC PATCH 4/4] tcp: parse tcp options contained in reset packets Florian Westphal
@ 2021-05-26 16:08   ` Matthieu Baerts
  0 siblings, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

From: Florian Westphal <fw@strlen.de>

This will be used to handle MPTCP_TCPRST suboption.

It allows an MPTCP receiver to learn more information when a subflow is
re-set.  The MPTCP_TCPRST option gives an error code (protocol error,
path too slow, middlebox interference detected, and so on).

This allows an MPTCP receiver to make a decision to reopen the subflow
at a later time, or even completely disable the path.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/tcp.h        |  2 +-
 net/ipv4/tcp_input.c     | 13 ++++++++-----
 net/ipv4/tcp_minisocks.c |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a981b5d60112..92eee154e2a3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -609,7 +609,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 /* tcp_input.c */
 void tcp_rearm_rto(struct sock *sk);
 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
-void tcp_reset(struct sock *sk);
+void tcp_reset(struct sock *sk, struct sk_buff *skb);
 void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
 void tcp_fin(struct sock *sk);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8afa4af30fdc..0a10ba1df1a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4206,10 +4206,13 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
 }
 
 /* When we get a reset we do this. */
-void tcp_reset(struct sock *sk)
+void tcp_reset(struct sock *sk, struct sk_buff *skb)
 {
 	trace_tcp_receive_reset(sk);
 
+	if (sk_is_mptcp(sk))
+		mptcp_incoming_options(sk, skb);
+
 	/* We want the right error as BSD sees it (and indeed as we do). */
 	switch (sk->sk_state) {
 	case TCP_SYN_SENT:
@@ -5590,7 +5593,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 						  &tp->last_oow_ack_time))
 				tcp_send_dupack(sk, skb);
 		} else if (tcp_reset_check(sk, skb)) {
-			tcp_reset(sk);
+			tcp_reset(sk, skb);
 		}
 		goto discard;
 	}
@@ -5626,7 +5629,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		}
 
 		if (rst_seq_match)
-			tcp_reset(sk);
+			tcp_reset(sk, skb);
 		else {
 			/* Disable TFO if RST is out-of-order
 			 * and no data has been received
@@ -6059,7 +6062,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		if (th->rst) {
-			tcp_reset(sk);
+			tcp_reset(sk, skb);
 			goto discard;
 		}
 
@@ -6501,7 +6504,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
-				tcp_reset(sk);
+				tcp_reset(sk, skb);
 				return 1;
 			}
 		}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 56c306e3cd2f..12f2495f98df 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -802,7 +802,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		req->rsk_ops->send_reset(sk, skb);
 	} else if (fastopen) { /* received a valid RST pkt */
 		reqsk_fastopen_remove(sk, req, true);
-		tcp_reset(sk);
+		tcp_reset(sk, skb);
 	}
 	if (!fastopen) {
 		inet_csk_reqsk_queue_drop(sk, req);

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP] [RFC mptpcp-next] mptcp: add ooo prune support
  2020-10-02 15:45 ` [MPTCP] [RFC mptpcp-next] mptcp: add ooo prune support Florian Westphal
@ 2021-05-26 16:08   ` Matthieu Baerts
  0 siblings, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

From: Florian Westphal <fw@strlen.de>

It might be possible that entire receive buffer is occupied by
skbs in the OOO queue.

In this case we can't pull more skbs from subflows and the holes
will never be filled.

If this happens, schedule the work queue and prune ~12% of skbs to
make space available. Also add a MIB counter for this.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 Paolo, this does relate a bit to our discussion wrt. oow
 tracking.  I thought we might need to add some sort of cushion to
 account for window discrepancies, but that might then get us
 in a state where wmem might be full...

 What do you think?

 I did NOT see such a problem in practice, this is a theoretical "fix".
 TCP has similar code to deal with corner cases of small-oow packets.

 net/mptcp/mib.c      |  1 +
 net/mptcp/mib.h      |  1 +
 net/mptcp/protocol.c | 48 ++++++++++++++++++++++++++++++++++++++++++--
 net/mptcp/protocol.h |  1 +
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 84d119436b22..65c575e3af60 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
 	SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
 	SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
+	SNMP_MIB_ITEM("OFOPrune", MPTCP_MIB_OFOPRUNE),
 	SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
 	SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
 	SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 47bcecce1106..75a7fb3a87db 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_OFOQUEUETAIL,	/* Segments inserted into OoO queue tail */
 	MPTCP_MIB_OFOQUEUE,		/* Segments inserted into OoO queue */
 	MPTCP_MIB_OFOMERGE,		/* Segments merged in OoO queue */
+	MPTCP_MIB_OFOPRUNE,		/* Segments pruned from OoO queue */
 	MPTCP_MIB_NODSSWINDOW,		/* Segments not in MPTCP windows */
 	MPTCP_MIB_DUPDATA,		/* Segments discarded due to duplicate DSS */
 	MPTCP_MIB_ADDADDR,		/* Received ADD_ADDR with echo-flag=0 */
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 79cd8e879c10..4cc30a3d426c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -658,8 +658,17 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 		sk_rbuf = ssk_rbuf;
 
 	/* over limit? can't append more skbs to msk */
-	if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
-		goto wake;
+	if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) {
+		if (likely(!skb_queue_empty(&sk->sk_receive_queue)))
+			goto wake;
+
+		/* Entire recvbuf occupied by OOO skbs? Prune time. */
+		if (!test_and_set_bit(MPTCP_WORK_PRUNE_OFO, &msk->flags) &&
+		     schedule_work(&msk->work))
+			sock_hold(sk);
+
+		return;
+	}
 
 	if (move_skbs_to_msk(msk, ssk))
 		goto wake;
@@ -1797,6 +1806,38 @@ static bool mptcp_check_close_timeout(const struct sock *sk)
 	return true;
 }
 
+static void mptcp_prune_ofo(struct mptcp_sock *msk)
+{
+	struct sock *sk = &msk->sk.icsk_inet.sk;
+	struct sk_buff *skb, *prev = NULL;
+	int goal;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return;
+
+	if (WARN_ON_ONCE(RB_EMPTY_ROOT(&msk->out_of_order_queue)))
+		return;
+
+	MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOPRUNE);
+
+	goal = READ_ONCE(sk->sk_rcvbuf) >> 3;
+	skb = msk->ooo_last_skb;
+
+	while (skb) {
+		prev = skb_rb_prev(skb);
+		rb_erase(&skb->rbnode, &msk->out_of_order_queue);
+		goal -= skb->truesize;
+		mptcp_drop(sk, skb);
+
+		if (goal <= 0)
+			break;
+		skb = prev;
+	}
+
+	msk->ooo_last_skb = prev;
+}
+
 static void mptcp_worker(struct work_struct *work)
 {
 	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -1819,6 +1860,9 @@ static void mptcp_worker(struct work_struct *work)
 	if (mptcp_send_head(sk))
 		mptcp_push_pending(sk, 0);
 
+	if (test_and_clear_bit(MPTCP_WORK_PRUNE_OFO, &msk->flags))
+		mptcp_prune_ofo(msk);
+
 	if (msk->pm.status)
 		pm_work(msk);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d33e9676a1a3..360441fdaa93 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -91,6 +91,7 @@
 #define MPTCP_WORK_EOF		3
 #define MPTCP_FALLBACK_DONE	4
 #define MPTCP_WORKER_RUNNING	5
+#define MPTCP_WORK_PRUNE_OFO	6
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP] [PATCH MPTCP 1/5] tcp: make two mptcp helpers available to tcp stack
  2020-11-05 17:01 ` [MPTCP] [PATCH MPTCP 1/5] tcp: make two mptcp helpers available to tcp stack Florian Westphal
@ 2021-05-26 16:08   ` Matthieu Baerts
  0 siblings, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

From: Florian Westphal <fw@strlen.de>

needed by followup patches to add mptcp reset (and fastclose)
options to tcp reset packets.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/mptcp.h  | 10 ++++++++++
 include/net/tcp.h    |  5 +++++
 net/mptcp/protocol.h | 11 -----------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index b6cf07143a8a..3d57607982fa 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -78,6 +78,11 @@ static inline bool rsk_drop_req(const struct request_sock *req)
 	return tcp_rsk(req)->is_mptcp && tcp_rsk(req)->drop_req;
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
+}
+
 void mptcp_space(const struct sock *ssk, int *space, int *full_space);
 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 		       unsigned int *size, struct mptcp_out_options *opts);
@@ -169,6 +174,11 @@ static inline bool rsk_drop_req(const struct request_sock *req)
 	return false;
 }
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+	return NULL;
+}
+
 static inline void mptcp_parse_option(const struct sk_buff *skb,
 				      const unsigned char *ptr, int opsize,
 				      struct tcp_options_received *opt_rx)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f3d42cb626fc..8115164e0df6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2377,4 +2377,9 @@ static inline u64 tcp_transmit_time(const struct sock *sk)
 	return 0;
 }
 
+static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
+{
+	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
+		     ((nib & 0xF) << 8) | field);
+}
 #endif	/* _TCP_H */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d29c6a4749eb..66bd4d096753 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -139,12 +139,6 @@ struct mptcp_options_received {
 	u16	port;
 };
 
-static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
-{
-	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
-		     ((nib & 0xF) << 8) | field);
-}
-
 struct mptcp_addr_info {
 	sa_family_t		family;
 	__be16			port;
@@ -568,11 +562,6 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
 void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 
-static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
-{
-	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
-}
-
 void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
 
 static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP] [PATCH MPTCP 5/5] mptcp: send fastclose if userspace closes socket with unread data
  2020-11-05 17:01 ` [MPTCP] [PATCH MPTCP 5/5] mptcp: send fastclose if userspace closes socket with unread data Florian Westphal
@ 2021-05-26 16:08   ` Matthieu Baerts
  0 siblings, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

From: Florian Westphal <fw@strlen.de>

Add building & sending of FASTCLOSE option.
RFC 8684 describes two methods:

A): Host sends an ACK containing the MP_FASTCLOSE
    option on one subflow [..] On all the other subflows,
    Host A sends a regular TCP RST to close these subflows and tears
    them down. [..]

R): Host A sends a RST containing the MP_FASTCLOSE option on all
    subflows [..].  Host A can tear down the subflows and the
    connection immediately.

This implements option R) only: All subflows are re-set with FASTCLOSE.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/options.c  | 35 +++++++++++++++++++++++++++++++++++
 net/mptcp/protocol.c | 32 +++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  1 +
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 0a940687f738..08b60d527de0 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -681,6 +681,31 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
 	return true;
 }
 
+static bool mptcp_fastclose(const struct mptcp_sock *msk)
+{
+	return READ_ONCE(msk->snd_fastclose);
+}
+
+static bool mptcp_established_options_fastclose(struct sock *sk,
+						unsigned int *size,
+						unsigned int remaining,
+						struct mptcp_out_options *opts)
+{
+	const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (likely(!mptcp_fastclose(mptcp_sk(subflow->conn))))
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FASTCLOSE)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FASTCLOSE;
+	opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
+	opts->rcvr_key = subflow->remote_key;
+
+	return true;
+}
+
 static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
 						   unsigned int *size,
 						   unsigned int remaining,
@@ -691,6 +716,9 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	if (remaining < TCPOLEN_MPTCP_RST)
 		return;
 
+	if (mptcp_established_options_fastclose(sk, size, remaining, opts))
+		return;
+
 	*size = TCPOLEN_MPTCP_RST;
 	opts->suboptions |= OPTION_MPTCP_RST;
 	opts->reset_transient = subflow->reset_transient;
@@ -1179,6 +1207,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 		ptr += 5;
 	}
 
+	if (OPTION_MPTCP_FASTCLOSE & opts->suboptions) {
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE,
+				      TCPOLEN_MPTCP_FASTCLOSE, 0, 0);
+		put_unaligned_be64(opts->rcvr_key, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_RST & opts->suboptions)
 		*ptr++ = mptcp_option(MPTCPOPT_RST,
 				      TCPOLEN_MPTCP_RST,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7e9705943813..6b6efa00cad5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2133,6 +2133,29 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
 	}
 }
 
+static void __mptcp_send_fastclose(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow, *tmp;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	WRITE_ONCE(msk->snd_fastclose, true);
+
+	__mptcp_flush_join_list(msk);
+	__mptcp_clear_xmit(sk);
+
+	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+
+	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+
+		lock_sock(tcp_sk);
+		subflow->reset_transient = 0;
+		subflow->reset_reason = MPTCP_RST_EMPTCP;
+		mptcp_subflow_reset(tcp_sk);
+		release_sock(tcp_sk);
+	}
+}
+
 static void __mptcp_wr_shutdown(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2185,6 +2208,7 @@ static void mptcp_close(struct sock *sk, long timeout)
 {
 	struct mptcp_subflow_context *subflow;
 	bool do_cancel_work = false;
+	bool send_fin = false;
 
 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2197,7 +2221,13 @@ static void mptcp_close(struct sock *sk, long timeout)
 		goto cleanup;
 	}
 
-	if (mptcp_close_state(sk))
+	send_fin = mptcp_close_state(sk);
+	if (!skb_queue_empty(&sk->sk_receive_queue)) {
+		__mptcp_send_fastclose(sk);
+		send_fin = false;
+	}
+
+	if (send_fin)
 		__mptcp_wr_shutdown(sk);
 
 	sk_stream_wait_close(sk, timeout);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d4c99e091cb9..93352044bff9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -243,6 +243,7 @@ struct mptcp_sock {
 	bool		fully_established;
 	bool		rcv_data_fin;
 	bool		snd_data_fin_enable;
+	bool		snd_fastclose;
 	bool		rcv_fastclose;
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 	spinlock_t	join_list_lock;

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending
  2021-05-06  6:39 ` [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Geliang Tang
  2021-05-06  6:39   ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Geliang Tang
@ 2021-05-26 16:08   ` Matthieu Baerts
  1 sibling, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

From: Geliang Tang <geliangtang@gmail.com>

This patch added the MP_FAIL suboption sending support.

Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
this flag is set, send out MP_FAIL suboption.

Add a new member fail_seq in struct mptcp_out_options to save the data
sequence number to put into the MP_FAIL suboption.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 46 +++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  4 ++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index cb580b06152f..f48d3b5a3fd4 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -72,6 +72,7 @@ struct mptcp_out_options {
 	u32 nonce;
 	u64 thmac;
 	u32 token;
+	u64 fail_seq;
 	u8 hmac[20];
 	struct mptcp_ext ext_copy;
 #endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4179287bd647..485c5a77e71b 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -777,6 +777,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	opts->reset_reason = subflow->reset_reason;
 }
 
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+					      unsigned int *size,
+					      unsigned int remaining,
+					      struct mptcp_out_options *opts)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (!subflow->send_mp_fail)
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FAIL)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FAIL;
+	opts->suboptions |= OPTION_MPTCP_FAIL;
+	opts->fail_seq = subflow->fail_seq;
+
+	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+	return true;
+}
+
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts)
@@ -792,8 +814,16 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 	if (unlikely(__mptcp_check_fallback(msk)))
 		return false;
 
+	if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+		*size += opt_size;
+		remaining -= opt_size;
+		ret = true;
+	}
+
 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
-		mptcp_established_options_rst(sk, skb, size, remaining, opts);
+		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
+		*size += opt_size;
+		remaining -= opt_size;
 		return true;
 	}
 
@@ -1338,6 +1368,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 				      opts->backup, TCPOPT_NOP);
 	}
 
+	if (OPTION_MPTCP_FAIL & opts->suboptions) {
+		const struct sock *ssk = (const struct sock *)tp;
+		struct mptcp_subflow_context *subflow;
+
+		subflow = mptcp_subflow_ctx(ssk);
+		subflow->send_mp_fail = 0;
+
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+				      TCPOLEN_MPTCP_FAIL,
+				      0, 0);
+		put_unaligned_be64(opts->fail_seq, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
 				      TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 41baa2ffc9a9..ff70b3e97dd0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,7 @@
 #define OPTION_MPTCP_FASTCLOSE	BIT(8)
 #define OPTION_MPTCP_PRIO	BIT(9)
 #define OPTION_MPTCP_RST	BIT(10)
+#define OPTION_MPTCP_FAIL	BIT(11)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -67,6 +68,7 @@
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
 #define TCPOLEN_MPTCP_RST		4
+#define TCPOLEN_MPTCP_FAIL		12
 
 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
 
@@ -419,6 +421,7 @@ struct mptcp_subflow_context {
 		mpc_map : 1,
 		backup : 1,
 		send_mp_prio : 1,
+		send_mp_fail : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
@@ -433,6 +436,7 @@ struct mptcp_subflow_context {
 	u8	reset_seen:1;
 	u8	reset_transient:1;
 	u8	reset_reason:4;
+	u64	fail_seq;
 
 	long	delegated_status;
 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving
  2021-05-06  6:39   ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Geliang Tang
  2021-05-06  6:39     ` [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
  2021-05-08  0:44     ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Mat Martineau
@ 2021-05-26 16:08     ` Matthieu Baerts
  2 siblings, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

From: Geliang Tang <geliangtang@gmail.com>

This patch added handling for receiving MP_FAIL suboption.

Add a new members mp_fail and fail_seq in struct mptcp_options_received.
When MP_FAIL suboption is received, set mp_fail to 1 and save the sequence
number to fail_seq.

Then invoke mptcp_pm_mp_fail_received to deal with the MP_FAIL suboption.
In it, send MP_FAIL + RST and fallback.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/options.c  | 16 ++++++++++++++++
 net/mptcp/pm.c       | 17 +++++++++++++++++
 net/mptcp/protocol.h |  3 +++
 3 files changed, 36 insertions(+)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 485c5a77e71b..9795e3ccf6cc 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -341,6 +341,16 @@ static void mptcp_parse_option(const struct sock *sk,
 		mp_opt->reset_reason = *ptr;
 		break;
 
+	case MPTCPOPT_MP_FAIL:
+		if (opsize != TCPOLEN_MPTCP_FAIL)
+			break;
+
+		ptr += 2;
+		mp_opt->mp_fail = 1;
+		mp_opt->fail_seq = get_unaligned_be64(ptr);
+		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+		break;
+
 	default:
 		break;
 	}
@@ -367,6 +377,7 @@ void mptcp_get_options(const struct sock *sk,
 	mp_opt->reset = 0;
 	mp_opt->csum_reqd = 0;
 	mp_opt->deny_join_id0 = 0;
+	mp_opt->mp_fail = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -1127,6 +1138,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		mp_opt.mp_prio = 0;
 	}
 
+	if (mp_opt.mp_fail) {
+		mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+		mp_opt.mp_fail = 0;
+	}
+
 	if (mp_opt.reset) {
 		subflow->reset_seen = 1;
 		subflow->reset_reason = mp_opt.reset_reason;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 639271e09604..87152a2bcbc4 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -247,6 +247,23 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
 	mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
 }
 
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+	pr_debug("map_seq=%llu fail_seq=%llu %llu", subflow->map_seq, subflow->fail_seq, fail_seq);
+
+	if (subflow->fail_seq != fail_seq) {
+		subflow->send_mp_fail = 1;
+		subflow->fail_seq = fail_seq;
+		mptcp_subflow_reset(sk);
+	}
+
+	pr_fallback(msk);
+	__mptcp_do_fallback(msk);
+}
+
 /* path manager helpers */
 
 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ff70b3e97dd0..fef6adef3c99 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -138,6 +138,7 @@ struct mptcp_options_received {
 		add_addr : 1,
 		rm_addr : 1,
 		mp_prio : 1,
+		mp_fail : 1,
 		echo : 1,
 		csum_reqd : 1,
 		backup : 1,
@@ -159,6 +160,7 @@ struct mptcp_options_received {
 	u64	ahmac;
 	u8	reset_reason:4;
 	u8	reset_transient:1;
+	u64	fail_seq;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -697,6 +699,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 				 struct mptcp_addr_info *addr,
 				 u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND] [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail
  2021-05-06  6:39     ` [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
  2021-05-08  0:54       ` Mat Martineau
@ 2021-05-26 16:08       ` Matthieu Baerts
  1 sibling, 0 replies; 20+ messages in thread
From: Matthieu Baerts @ 2021-05-26 16:08 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

From: Geliang Tang <geliangtang@gmail.com>

When a bad checksum is detected, send out the MP_FAIL suboption.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/subflow.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 659b8842ae3b..efd84ff61015 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -906,6 +906,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
 	if (unlikely(csum_fold(csum))) {
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCSUMERR);
+		subflow->send_mp_fail = 1;
+		subflow->fail_seq = subflow->map_seq;
 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
 	}
 

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-05-26 16:08 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-26 16:08 [RESEND] [PATCH 0/8] Please ignore: resending some patches for patchwork.kernel.org Matthieu Baerts
2020-09-24 14:35 ` [MPTCP] [RFC PATCH 2/4] tcp: move selected mptcp helpers to tcp.h/mptcp.h Florian Westphal
2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
2020-09-24 14:35 ` [MPTCP] [RFC PATCH 4/4] tcp: parse tcp options contained in reset packets Florian Westphal
2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
2020-10-02 15:45 ` [MPTCP] [RFC mptpcp-next] mptcp: add ooo prune support Florian Westphal
2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
2020-11-05 17:01 ` [MPTCP] [PATCH MPTCP 1/5] tcp: make two mptcp helpers available to tcp stack Florian Westphal
2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
2020-11-05 17:01 ` [MPTCP] [PATCH MPTCP 5/5] mptcp: send fastclose if userspace closes socket with unread data Florian Westphal
2021-05-26 16:08   ` [RESEND] " Matthieu Baerts
  -- strict thread matches above, loose matches on Subject: below --
2021-05-06  6:39 [MPTCP][PATCH mptcp-next 0/3] MP_FAIL support Geliang Tang
2021-05-06  6:39 ` [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Geliang Tang
2021-05-06  6:39   ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Geliang Tang
2021-05-06  6:39     ` [MPTCP][PATCH mptcp-next 3/3] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
2021-05-08  0:54       ` Mat Martineau
2021-05-26 16:08       ` [RESEND] " Matthieu Baerts
2021-05-08  0:44     ` [MPTCP][PATCH mptcp-next 2/3] mptcp: MP_FAIL suboption receiving Mat Martineau
2021-05-26 16:08     ` [RESEND] " Matthieu Baerts
2021-05-26 16:08   ` [RESEND] [MPTCP][PATCH mptcp-next 1/3] mptcp: MP_FAIL suboption sending Matthieu Baerts

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).