[MPTCP][PATCH v3 mptcp-next 0/8] MP

mptcp.lists.linux.dev archive mirror
 help / color / mirror / Atom feed

* [MPTCP][PATCH v3 mptcp-next 0/8] MP_FAIL support
@ 2021-06-28  4:28 Geliang Tang
  2021-06-28  4:28 ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Geliang Tang
  0 siblings, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:28 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

v3:
 - respond with MP_FAIL
 - add single subflow check
 - add infinite mapping sending and receiving
 - export/20210626T054902

v2:
 - MP_FAIL logic:
   * Peer B send a DSS to peer A, and the data has been modify by the
  middleboxes, then peer A detects the bad checksum.
   * In the multiple subflows case, peer A sends MP_FAIL+RST back to peer B,
  and peer A discards the data following the bad data sequence number. Peer
  B receives this MP_FAIL+RST, and close this subflow.
   * In the single subflow case, using the simple implementation, peer A
  sends MP_FAIL back to peer B, and peer A fallback to a regular TCP. Peer
  B receives this MP_FAIL, and fallback to a regular TCP.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52

Geliang Tang (8):
  mptcp: MP_FAIL suboption sending
  mptcp: MP_FAIL suboption receiving
  mptcp: send out MP_FAIL when data checksum fail
  mptcp: add the mibs for MP_FAIL
  selftests: mptcp: add MP_FAIL mibs check
  mptcp: infinite mapping sending
  mptcp: infinite mapping receiving
  mptcp: add a mib for the infinite mapping sending

 include/net/mptcp.h                           |  1 +
 net/mptcp/mib.c                               |  3 +
 net/mptcp/mib.h                               |  3 +
 net/mptcp/options.c                           | 96 ++++++++++++++++++-
 net/mptcp/pm.c                                | 20 ++++
 net/mptcp/protocol.c                          |  1 +
 net/mptcp/protocol.h                          | 28 ++++++
 net/mptcp/subflow.c                           | 28 ++++++
 .../testing/selftests/net/mptcp/mptcp_join.sh | 38 ++++++++
 9 files changed, 215 insertions(+), 3 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending
  2021-06-28  4:28 [MPTCP][PATCH v3 mptcp-next 0/8] MP_FAIL support Geliang Tang
@ 2021-06-28  4:28 ` Geliang Tang
  2021-06-28  4:28   ` [MPTCP][PATCH v3 mptcp-next 2/8] mptcp: MP_FAIL suboption receiving Geliang Tang
                     ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:28 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the MP_FAIL suboption sending support.

Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
this flag is set, send out MP_FAIL suboption.

Add a new member fail_seq in struct mptcp_out_options to save the data
sequence number to put into the MP_FAIL suboption.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 54 ++++++++++++++++++++++++++++++++++++++++++--
 net/mptcp/protocol.h |  4 ++++
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index cb580b06152f..f48d3b5a3fd4 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -72,6 +72,7 @@ struct mptcp_out_options {
 	u32 nonce;
 	u64 thmac;
 	u32 token;
+	u64 fail_seq;
 	u8 hmac[20];
 	struct mptcp_ext ext_copy;
 #endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b5850afea343..b78defe1aed9 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -771,6 +771,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	opts->reset_reason = subflow->reset_reason;
 }
 
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+					      unsigned int *size,
+					      unsigned int remaining,
+					      struct mptcp_out_options *opts)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (!subflow->send_mp_fail)
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FAIL)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FAIL;
+	opts->suboptions |= OPTION_MPTCP_FAIL;
+	opts->fail_seq = subflow->fail_seq;
+
+	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+	return true;
+}
+
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts)
@@ -787,15 +809,29 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 		return false;
 
 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
-		mptcp_established_options_rst(sk, skb, size, remaining, opts);
+		if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+			*size += opt_size;
+			remaining -= opt_size;
+		}
+		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
+		*size += opt_size;
+		remaining -= opt_size;
 		return true;
 	}
 
 	snd_data_fin = mptcp_data_fin_enabled(msk);
 	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
 		ret = true;
-	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
 		ret = true;
+		if (opts->ext_copy.use_ack) {
+			if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+				*size += opt_size;
+				remaining -= opt_size;
+				ret = true;
+			}
+		}
+	}
 
 	/* we reserved enough space for the above options, and exceeding the
 	 * TCP option space would be fatal
@@ -1334,6 +1370,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 				      opts->backup, TCPOPT_NOP);
 	}
 
+	if (OPTION_MPTCP_FAIL & opts->suboptions) {
+		const struct sock *ssk = (const struct sock *)tp;
+		struct mptcp_subflow_context *subflow;
+
+		subflow = mptcp_subflow_ctx(ssk);
+		subflow->send_mp_fail = 0;
+
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+				      TCPOLEN_MPTCP_FAIL,
+				      0, 0);
+		put_unaligned_be64(opts->fail_seq, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
 				      TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 426ed80fe72f..007af5e4ba3d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,7 @@
 #define OPTION_MPTCP_FASTCLOSE	BIT(8)
 #define OPTION_MPTCP_PRIO	BIT(9)
 #define OPTION_MPTCP_RST	BIT(10)
+#define OPTION_MPTCP_FAIL	BIT(11)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -67,6 +68,7 @@
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
 #define TCPOLEN_MPTCP_RST		4
+#define TCPOLEN_MPTCP_FAIL		12
 
 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
 
@@ -417,6 +419,7 @@ struct mptcp_subflow_context {
 		mpc_map : 1,
 		backup : 1,
 		send_mp_prio : 1,
+		send_mp_fail : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
@@ -431,6 +434,7 @@ struct mptcp_subflow_context {
 	u8	reset_seen:1;
 	u8	reset_transient:1;
 	u8	reset_reason:4;
+	u64	fail_seq;
 
 	long	delegated_status;
 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 2/8] mptcp: MP_FAIL suboption receiving
  2021-06-28  4:28 ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Geliang Tang
@ 2021-06-28  4:28   ` Geliang Tang
  2021-06-28  4:28     ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
  2021-07-07 23:07   ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Mat Martineau
  2021-07-14  8:45   ` Paolo Abeni
  2 siblings, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:28 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added handling for receiving MP_FAIL suboption.

Add a new members mp_fail and fail_seq in struct mptcp_options_received.
When MP_FAIL suboption is received, set mp_fail to 1 and save the sequence
number to fail_seq.

Then invoke mptcp_pm_mp_fail_received to deal with the MP_FAIL suboption.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/options.c  | 16 ++++++++++++++++
 net/mptcp/pm.c       |  7 +++++++
 net/mptcp/protocol.h |  3 +++
 3 files changed, 26 insertions(+)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b78defe1aed9..9e6ab6a3d425 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -336,6 +336,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		mp_opt->reset_reason = *ptr;
 		break;
 
+	case MPTCPOPT_MP_FAIL:
+		if (opsize != TCPOLEN_MPTCP_FAIL)
+			break;
+
+		ptr += 2;
+		mp_opt->mp_fail = 1;
+		mp_opt->fail_seq = get_unaligned_be64(ptr);
+		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+		break;
+
 	default:
 		break;
 	}
@@ -364,6 +374,7 @@ void mptcp_get_options(const struct sock *sk,
 	mp_opt->reset = 0;
 	mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
 	mp_opt->deny_join_id0 = 0;
+	mp_opt->mp_fail = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -1129,6 +1140,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		mp_opt.mp_prio = 0;
 	}
 
+	if (mp_opt.mp_fail) {
+		mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+		mp_opt.mp_fail = 0;
+	}
+
 	if (mp_opt.reset) {
 		subflow->reset_seen = 1;
 		subflow->reset_reason = mp_opt.reset_reason;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 639271e09604..d4c19420194a 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -247,6 +247,13 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
 	mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
 }
 
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
+}
+
 /* path manager helpers */
 
 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 007af5e4ba3d..8e050575a2d9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -139,6 +139,7 @@ struct mptcp_options_received {
 		add_addr : 1,
 		rm_addr : 1,
 		mp_prio : 1,
+		mp_fail : 1,
 		echo : 1,
 		csum_reqd : 1,
 		backup : 1,
@@ -160,6 +161,7 @@ struct mptcp_options_received {
 	u64	ahmac;
 	u8	reset_reason:4;
 	u8	reset_transient:1;
+	u64	fail_seq;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -704,6 +706,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 				 struct mptcp_addr_info *addr,
 				 u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail
  2021-06-28  4:28   ` [MPTCP][PATCH v3 mptcp-next 2/8] mptcp: MP_FAIL suboption receiving Geliang Tang
@ 2021-06-28  4:28     ` Geliang Tang
  2021-06-28  4:29       ` [MPTCP][PATCH v3 mptcp-next 4/8] mptcp: add the mibs for MP_FAIL Geliang Tang
  2021-07-07 23:20       ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Mat Martineau
  0 siblings, 2 replies; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:28 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

When a bad checksum is detected, send out the MP_FAIL option.

When multiple subflows are in use, close the affected subflow with a RST
that includes an MP_FAIL option.

When a single subfow is in use, send back an MP_FAIL option on the
subflow-level ACK. And the receiver of this MP_FAIL respond with an
MP_FAIL in the reverse direction.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/pm.c       | 10 ++++++++++
 net/mptcp/protocol.h | 14 ++++++++++++++
 net/mptcp/subflow.c  | 18 ++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d4c19420194a..c34c9c0b2fa5 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -250,8 +250,18 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 
 	pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
+
+	if (!msk->pm.subflows) {
+		if (!subflow->mp_fail_need_echo) {
+			subflow->send_mp_fail = 1;
+			subflow->fail_seq = fail_seq;
+		} else {
+			subflow->mp_fail_need_echo = 0;
+		}
+	}
 }
 
 /* path manager helpers */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 8e050575a2d9..7a49454c77a6 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -422,6 +422,7 @@ struct mptcp_subflow_context {
 		backup : 1,
 		send_mp_prio : 1,
 		send_mp_fail : 1,
+		mp_fail_need_echo : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
@@ -594,6 +595,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
 	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
 }
 
+static inline bool mptcp_has_another_subflow_established(struct sock *ssk)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+	mptcp_for_each_subflow(msk, tmp) {
+		if (tmp->fully_established && tmp != subflow)
+			return true;
+	}
+
+	return false;
+}
+
 void __init mptcp_proto_init(void);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 int __init mptcp_proto_v6_init(void);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0b5d4a3eadcd..46302208c474 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -913,6 +913,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
 	if (unlikely(csum_fold(csum))) {
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+		subflow->send_mp_fail = 1;
+		subflow->fail_seq = subflow->map_seq;
 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
 	}
 
@@ -1160,6 +1162,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
 
 fallback:
 	/* RFC 8684 section 3.7. */
+	if (subflow->send_mp_fail) {
+		if (mptcp_has_another_subflow_established(ssk)) {
+			mptcp_subflow_reset(ssk);
+			while ((skb = skb_peek(&ssk->sk_receive_queue)))
+				sk_eat_skb(ssk, skb);
+			WRITE_ONCE(subflow->data_avail, 0);
+			return true;
+		}
+
+		if (!msk->pm.subflows) {
+			subflow->mp_fail_need_echo = 1;
+			WRITE_ONCE(subflow->data_avail, 0);
+			return true;
+		}
+	}
+
 	if (subflow->mp_join || subflow->fully_established) {
 		/* fatal protocol error, close the socket.
 		 * subflow_error_report() will introduce the appropriate barriers
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 4/8] mptcp: add the mibs for MP_FAIL
  2021-06-28  4:28     ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
@ 2021-06-28  4:29       ` Geliang Tang
  2021-06-28  4:29         ` [MPTCP][PATCH v3 mptcp-next 5/8] selftests: mptcp: add MP_FAIL mibs check Geliang Tang
  2021-07-07 23:20       ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Mat Martineau
  1 sibling, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:29 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the mibs for MP_FAIL: MPTCP_MIB_MPFAILTX and
MPTCP_MIB_MPFAILRX.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/mib.c     | 2 ++
 net/mptcp/mib.h     | 2 ++
 net/mptcp/options.c | 1 +
 net/mptcp/pm.c      | 2 ++
 net/mptcp/subflow.c | 1 +
 5 files changed, 8 insertions(+)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 52ea2517e856..3f0df09138b2 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -44,6 +44,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
 	SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
 	SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+	SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
+	SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 193466c9b549..2a6cb1d8704c 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -37,6 +37,8 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_RMSUBFLOW,		/* Remove a subflow */
 	MPTCP_MIB_MPPRIOTX,		/* Transmit a MP_PRIO */
 	MPTCP_MIB_MPPRIORX,		/* Received a MP_PRIO */
+	MPTCP_MIB_MPFAILTX,		/* Transmit a MP_FAIL */
+	MPTCP_MIB_MPFAILRX,		/* Received a MP_FAIL */
 	__MPTCP_MIB_MAX
 };
 
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9e6ab6a3d425..9c2e122f1a6e 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1142,6 +1142,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 
 	if (mp_opt.mp_fail) {
 		mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
 		mp_opt.mp_fail = 0;
 	}
 
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index c34c9c0b2fa5..539fc2892191 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -9,6 +9,7 @@
 #include <net/tcp.h>
 #include <net/mptcp.h>
 #include "protocol.h"
+#include "mib.h"
 
 /* path manager command handlers */
 
@@ -258,6 +259,7 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
 		if (!subflow->mp_fail_need_echo) {
 			subflow->send_mp_fail = 1;
 			subflow->fail_seq = fail_seq;
+			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
 		} else {
 			subflow->mp_fail_need_echo = 0;
 		}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 46302208c474..d46c171ef9b0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -915,6 +915,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
 		subflow->send_mp_fail = 1;
 		subflow->fail_seq = subflow->map_seq;
+		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
 	}
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 5/8] selftests: mptcp: add MP_FAIL mibs check
  2021-06-28  4:29       ` [MPTCP][PATCH v3 mptcp-next 4/8] mptcp: add the mibs for MP_FAIL Geliang Tang
@ 2021-06-28  4:29         ` Geliang Tang
  2021-06-28  4:29           ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Geliang Tang
  0 siblings, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:29 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added a function chk_fail_nr to check the mibs for MP_FAIL.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 .../testing/selftests/net/mptcp/mptcp_join.sh | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 9a191c1a5de8..535a2757ec30 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -541,6 +541,43 @@ chk_csum_nr()
 	fi
 }
 
+chk_fail_nr()
+{
+	local mp_fail_nr_tx=$1
+	local mp_fail_nr_rx=$2
+	local count
+	local dump_stats
+
+	printf "%-39s %s" " " "ftx"
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$mp_fail_nr_tx" ]; then
+		echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - frx   "
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$mp_fail_nr_rx" ]; then
+		echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	if [ "${dump_stats}" = 1 ]; then
+		echo Server ns stats
+		ip netns exec $ns1 nstat -as | grep MPTcp
+		echo Client ns stats
+		ip netns exec $ns2 nstat -as | grep MPTcp
+	fi
+}
+
 chk_join_nr()
 {
 	local msg="$1"
@@ -590,6 +627,7 @@ chk_join_nr()
 	fi
 	if [ $checksum -eq 1 ]; then
 		chk_csum_nr
+		chk_fail_nr 0 0
 	fi
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending
  2021-06-28  4:29         ` [MPTCP][PATCH v3 mptcp-next 5/8] selftests: mptcp: add MP_FAIL mibs check Geliang Tang
@ 2021-06-28  4:29           ` Geliang Tang
  2021-06-28  4:29             ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Geliang Tang
  2021-07-07 23:44             ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Mat Martineau
  0 siblings, 2 replies; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:29 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the infinite mapping sending logic.

Added a new flag snd_infinite_mapping_enable in mptcp_sock. Set it true
when echoing the MP_FAIL in mptcp_pm_mp_fail_received.

In mptcp_established_options_dss, if this flag is true, call the new
function mptcp_write_infinite_mapping to set the infiniting mapping and
sent it out.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/options.c  | 23 +++++++++++++++++++++--
 net/mptcp/pm.c       |  1 +
 net/mptcp/protocol.c |  1 +
 net/mptcp/protocol.h |  6 ++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9c2e122f1a6e..1fce6fddb6ab 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -572,8 +572,21 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
 	}
 }
 
+static void mptcp_write_infinite_mapping(struct mptcp_subflow_context *subflow,
+					 struct mptcp_ext *ext)
+{
+	pr_debug("fail_seq=%llu", subflow->fail_seq);
+
+	if (ext->use_map) {
+		ext->data_seq = subflow->fail_seq;
+		ext->data_len = 0;
+		WRITE_ONCE(msk->snd_infinite_mapping_enable, false);
+	}
+}
+
 static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 					  bool snd_data_fin_enable,
+					  bool snd_infinite_mapping_enable,
 					  unsigned int *size,
 					  unsigned int remaining,
 					  struct mptcp_out_options *opts)
@@ -589,7 +602,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 	opts->csum_reqd = READ_ONCE(msk->csum_enabled);
 	mpext = skb ? mptcp_get_ext(skb) : NULL;
 
-	if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
+	if (!skb || (mpext && mpext->use_map) ||
+	    snd_data_fin_enable || snd_infinite_mapping_enable) {
 		unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
 
 		if (mpext) {
@@ -603,6 +617,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 		dss_size = map_size;
 		if (skb && snd_data_fin_enable)
 			mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
+		if (skb && snd_infinite_mapping_enable)
+			mptcp_write_infinite_mapping(subflow, &opts->ext_copy);
 		ret = true;
 	}
 
@@ -811,6 +827,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 	unsigned int opt_size = 0;
+	bool snd_infinite_mapping;
 	bool snd_data_fin;
 	bool ret = false;
 
@@ -831,9 +848,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 	}
 
 	snd_data_fin = mptcp_data_fin_enabled(msk);
+	snd_infinite_mapping = mptcp_infinite_mapping_enabled(msk);
 	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
 		ret = true;
-	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
+	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, snd_infinite_mapping,
+					       &opt_size, remaining, opts)) {
 		ret = true;
 		if (opts->ext_copy.use_ack) {
 			if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 539fc2892191..1b3b815f1eca 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -257,6 +257,7 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
 
 	if (!msk->pm.subflows) {
 		if (!subflow->mp_fail_need_echo) {
+			WRITE_ONCE(msk->snd_infinite_mapping_enable, true);
 			subflow->send_mp_fail = 1;
 			subflow->fail_seq = fail_seq;
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 199a36fe4f69..f1d057ee5887 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2727,6 +2727,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	WRITE_ONCE(msk->fully_established, false);
 	if (mp_opt->csum_reqd)
 		WRITE_ONCE(msk->csum_enabled, true);
+	WRITE_ONCE(msk->snd_infinite_mapping_enable, false);
 
 	msk->write_seq = subflow_req->idsn + 1;
 	msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7a49454c77a6..fff2f5021619 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -244,6 +244,7 @@ struct mptcp_sock {
 	bool		fully_established;
 	bool		rcv_data_fin;
 	bool		snd_data_fin_enable;
+	bool		snd_infinite_mapping_enable;
 	bool		rcv_fastclose;
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 	bool		csum_enabled;
@@ -656,6 +657,11 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
 }
 
+static inline bool mptcp_infinite_mapping_enabled(const struct mptcp_sock *msk)
+{
+	return READ_ONCE(msk->snd_infinite_mapping_enable);
+}
+
 static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
 {
 	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving
  2021-06-28  4:29           ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Geliang Tang
@ 2021-06-28  4:29             ` Geliang Tang
  2021-06-28  4:29               ` [MPTCP][PATCH v3 mptcp-next 8/8] mptcp: add a mib for the infinite mapping sending Geliang Tang
  2021-07-07 23:49               ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Mat Martineau
  2021-07-07 23:44             ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Mat Martineau
  1 sibling, 2 replies; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:29 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the infinite mapping receiving logic.

Added a new struct member infinite_mapping_received in struct
mptcp_subflow_context, set it true when the infinite mapping is
received.

In subflow_check_data_avail, if this flag is set, fallback to a regular
TCP.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/protocol.h | 1 +
 net/mptcp/subflow.c  | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index fff2f5021619..4ee97e9ceece 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -424,6 +424,7 @@ struct mptcp_subflow_context {
 		send_mp_prio : 1,
 		send_mp_fail : 1,
 		mp_fail_need_echo : 1,
+		infinite_mapping_received : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d46c171ef9b0..987fb6b55145 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -965,7 +965,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 
 	data_len = mpext->data_len;
 	if (data_len == 0) {
+		pr_debug("infinite mapping received, data_seq=%llu", mpext->data_seq);
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+		subflow->infinite_mapping_received = 1;
 		return MAPPING_INVALID;
 	}
 
@@ -1179,6 +1181,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
 		}
 	}
 
+	if (subflow->infinite_mapping_received) {
+		pr_fallback(msk);
+		__mptcp_do_fallback(msk);
+		subflow->infinite_mapping_received = 0;
+		return true;
+	}
+
 	if (subflow->mp_join || subflow->fully_established) {
 		/* fatal protocol error, close the socket.
 		 * subflow_error_report() will introduce the appropriate barriers
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [MPTCP][PATCH v3 mptcp-next 8/8] mptcp: add a mib for the infinite mapping sending
  2021-06-28  4:29             ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Geliang Tang
@ 2021-06-28  4:29               ` Geliang Tang
  2021-07-07 23:49               ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Mat Martineau
  1 sibling, 0 replies; 21+ messages in thread
From: Geliang Tang @ 2021-06-28  4:29 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added a new mib named MPTCP_MIB_INFINITEMAPTX, increase it
when a infinite mapping has been sent out.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
---
 net/mptcp/mib.c     | 1 +
 net/mptcp/mib.h     | 1 +
 net/mptcp/options.c | 4 ++++
 3 files changed, 6 insertions(+)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3f0df09138b2..1ac11966bdcd 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
 	SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
 	SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
+	SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
 	SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
 	SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
 	SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 2a6cb1d8704c..fe0d1b2e9455 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_JOINACKRX,		/* Received an ACK + MP_JOIN */
 	MPTCP_MIB_JOINACKMAC,		/* HMAC was wrong on ACK + MP_JOIN */
 	MPTCP_MIB_DSSNOMATCH,		/* Received a new mapping that did not match the previous one */
+	MPTCP_MIB_INFINITEMAPTX,	/* Sent an infinite mapping */
 	MPTCP_MIB_INFINITEMAPRX,	/* Received an infinite mapping */
 	MPTCP_MIB_DSSTCPMISMATCH,	/* DSS-mapping did not map with TCP's sequence numbers */
 	MPTCP_MIB_DATACSUMERR,		/* The data checksum fail */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 1fce6fddb6ab..abd0d4660a67 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -575,12 +575,16 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
 static void mptcp_write_infinite_mapping(struct mptcp_subflow_context *subflow,
 					 struct mptcp_ext *ext)
 {
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+	struct sock *sk = (struct sock *)msk;
+
 	pr_debug("fail_seq=%llu", subflow->fail_seq);
 
 	if (ext->use_map) {
 		ext->data_seq = subflow->fail_seq;
 		ext->data_len = 0;
 		WRITE_ONCE(msk->snd_infinite_mapping_enable, false);
+		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_INFINITEMAPTX);
 	}
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending
  2021-06-28  4:28 ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Geliang Tang
  2021-06-28  4:28   ` [MPTCP][PATCH v3 mptcp-next 2/8] mptcp: MP_FAIL suboption receiving Geliang Tang
@ 2021-07-07 23:07   ` Mat Martineau
  2021-07-14  8:45   ` Paolo Abeni
  2 siblings, 0 replies; 21+ messages in thread
From: Mat Martineau @ 2021-07-07 23:07 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 28 Jun 2021, Geliang Tang wrote:

> This patch added the MP_FAIL suboption sending support.
>
> Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
> this flag is set, send out MP_FAIL suboption.
>
> Add a new member fail_seq in struct mptcp_out_options to save the data
> sequence number to put into the MP_FAIL suboption.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> include/net/mptcp.h  |  1 +
> net/mptcp/options.c  | 54 ++++++++++++++++++++++++++++++++++++++++++--
> net/mptcp/protocol.h |  4 ++++
> 3 files changed, 57 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index cb580b06152f..f48d3b5a3fd4 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -72,6 +72,7 @@ struct mptcp_out_options {
> 	u32 nonce;
> 	u64 thmac;
> 	u32 token;
> +	u64 fail_seq;
> 	u8 hmac[20];
> 	struct mptcp_ext ext_copy;
> #endif
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index b5850afea343..b78defe1aed9 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -771,6 +771,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
> 	opts->reset_reason = subflow->reset_reason;
> }
>
> +static bool mptcp_established_options_mp_fail(struct sock *sk,
> +					      unsigned int *size,
> +					      unsigned int remaining,
> +					      struct mptcp_out_options *opts)
> +{
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +
> +	if (!subflow->send_mp_fail)
> +		return false;
> +
> +	if (remaining < TCPOLEN_MPTCP_FAIL)
> +		return false;
> +
> +	*size = TCPOLEN_MPTCP_FAIL;
> +	opts->suboptions |= OPTION_MPTCP_FAIL;
> +	opts->fail_seq = subflow->fail_seq;
> +
> +	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> +
> +	return true;
> +}
> +
> bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> 			       unsigned int *size, unsigned int remaining,
> 			       struct mptcp_out_options *opts)
> @@ -787,15 +809,29 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> 		return false;
>
> 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> -		mptcp_established_options_rst(sk, skb, size, remaining, opts);
> +		if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> +			*size += opt_size;
> +			remaining -= opt_size;
> +		}
> +		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> +		*size += opt_size;
> +		remaining -= opt_size;

If there isn't enough space for MP_TCPRST, opt_size can still have the 
value set by mptcp_established_options_mp_fail() and the previous two 
lines would add or subtract incorrectly. mptcp_established_options_rst() 
could return bool to make the size adjustments conditional like the new 
mp_fail lines above.

- Mat


> 		return true;
> 	}
>
> 	snd_data_fin = mptcp_data_fin_enabled(msk);
> 	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> 		ret = true;
> -	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> +	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
> 		ret = true;
> +		if (opts->ext_copy.use_ack) {
> +			if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> +				*size += opt_size;
> +				remaining -= opt_size;
> +				ret = true;
> +			}
> +		}
> +	}
>
> 	/* we reserved enough space for the above options, and exceeding the
> 	 * TCP option space would be fatal
> @@ -1334,6 +1370,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> 				      opts->backup, TCPOPT_NOP);
> 	}
>
> +	if (OPTION_MPTCP_FAIL & opts->suboptions) {
> +		const struct sock *ssk = (const struct sock *)tp;
> +		struct mptcp_subflow_context *subflow;
> +
> +		subflow = mptcp_subflow_ctx(ssk);
> +		subflow->send_mp_fail = 0;
> +
> +		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
> +				      TCPOLEN_MPTCP_FAIL,
> +				      0, 0);
> +		put_unaligned_be64(opts->fail_seq, ptr);
> +		ptr += 2;
> +	}
> +
> 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
> 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
> 				      TCPOLEN_MPTCP_MPJ_SYN,
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 426ed80fe72f..007af5e4ba3d 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -26,6 +26,7 @@
> #define OPTION_MPTCP_FASTCLOSE	BIT(8)
> #define OPTION_MPTCP_PRIO	BIT(9)
> #define OPTION_MPTCP_RST	BIT(10)
> +#define OPTION_MPTCP_FAIL	BIT(11)
>
> /* MPTCP option subtypes */
> #define MPTCPOPT_MP_CAPABLE	0
> @@ -67,6 +68,7 @@
> #define TCPOLEN_MPTCP_PRIO_ALIGN	4
> #define TCPOLEN_MPTCP_FASTCLOSE		12
> #define TCPOLEN_MPTCP_RST		4
> +#define TCPOLEN_MPTCP_FAIL		12
>
> #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
>
> @@ -417,6 +419,7 @@ struct mptcp_subflow_context {
> 		mpc_map : 1,
> 		backup : 1,
> 		send_mp_prio : 1,
> +		send_mp_fail : 1,
> 		rx_eof : 1,
> 		can_ack : 1,        /* only after processing the remote a key */
> 		disposable : 1;	    /* ctx can be free at ulp release time */
> @@ -431,6 +434,7 @@ struct mptcp_subflow_context {
> 	u8	reset_seen:1;
> 	u8	reset_transient:1;
> 	u8	reset_reason:4;
> +	u64	fail_seq;
>
> 	long	delegated_status;
> 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail
  2021-06-28  4:28     ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
  2021-06-28  4:29       ` [MPTCP][PATCH v3 mptcp-next 4/8] mptcp: add the mibs for MP_FAIL Geliang Tang
@ 2021-07-07 23:20       ` Mat Martineau
  2021-07-13 12:44         ` Geliang Tang
  1 sibling, 1 reply; 21+ messages in thread
From: Mat Martineau @ 2021-07-07 23:20 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 28 Jun 2021, Geliang Tang wrote:

> When a bad checksum is detected, send out the MP_FAIL option.
>
> When multiple subflows are in use, close the affected subflow with a RST
> that includes an MP_FAIL option.
>
> When a single subfow is in use, send back an MP_FAIL option on the
> subflow-level ACK. And the receiver of this MP_FAIL respond with an
> MP_FAIL in the reverse direction.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> net/mptcp/pm.c       | 10 ++++++++++
> net/mptcp/protocol.h | 14 ++++++++++++++
> net/mptcp/subflow.c  | 18 ++++++++++++++++++
> 3 files changed, 42 insertions(+)
>
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index d4c19420194a..c34c9c0b2fa5 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -250,8 +250,18 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
> void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
> {
> 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
>
> 	pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
> +
> +	if (!msk->pm.subflows) {

The pm.lock isn't held so it's not safe to access pm.subflows

I don't think it's sufficient to read pm.subflows with the lock or add 
READ_ONCE/WRITE_ONCE, since that would still allow race conditions with 
the msk. To handle fallback when receiving MP_FAIL I think the 
sock_owned_by_user() checks and delegated callback (similar to 
mptcp_subflow_process_delegated()) may be needed.

> +		if (!subflow->mp_fail_need_echo) {
> +			subflow->send_mp_fail = 1;
> +			subflow->fail_seq = fail_seq;

Echoing the fail_seq back doesn't seem correct, from the RFC it seems like 
this side should send a sequence number for the opposite data direction? 
Do you agree?

> +		} else {
> +			subflow->mp_fail_need_echo = 0;
> +		}
> +	}
> }
>
> /* path manager helpers */
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 8e050575a2d9..7a49454c77a6 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -422,6 +422,7 @@ struct mptcp_subflow_context {
> 		backup : 1,
> 		send_mp_prio : 1,
> 		send_mp_fail : 1,
> +		mp_fail_need_echo : 1,

I think mp_fail_expect_echo would be a more accurate name.

> 		rx_eof : 1,
> 		can_ack : 1,        /* only after processing the remote a key */
> 		disposable : 1;	    /* ctx can be free at ulp release time */
> @@ -594,6 +595,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
> 	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
> }
>
> +static inline bool mptcp_has_another_subflow_established(struct sock *ssk)
> +{
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
> +	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> +
> +	mptcp_for_each_subflow(msk, tmp) {
> +		if (tmp->fully_established && tmp != subflow)

Why check tmp->fully_established here?


- Mat


> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> void __init mptcp_proto_init(void);
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> int __init mptcp_proto_v6_init(void);
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 0b5d4a3eadcd..46302208c474 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -913,6 +913,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
> 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
> 	if (unlikely(csum_fold(csum))) {
> 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
> +		subflow->send_mp_fail = 1;
> +		subflow->fail_seq = subflow->map_seq;
> 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
> 	}
>
> @@ -1160,6 +1162,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
>
> fallback:
> 	/* RFC 8684 section 3.7. */
> +	if (subflow->send_mp_fail) {
> +		if (mptcp_has_another_subflow_established(ssk)) {
> +			mptcp_subflow_reset(ssk);
> +			while ((skb = skb_peek(&ssk->sk_receive_queue)))
> +				sk_eat_skb(ssk, skb);
> +			WRITE_ONCE(subflow->data_avail, 0);
> +			return true;
> +		}
> +
> +		if (!msk->pm.subflows) {
> +			subflow->mp_fail_need_echo = 1;
> +			WRITE_ONCE(subflow->data_avail, 0);
> +			return true;
> +		}
> +	}
> +
> 	if (subflow->mp_join || subflow->fully_established) {
> 		/* fatal protocol error, close the socket.
> 		 * subflow_error_report() will introduce the appropriate barriers
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending
  2021-06-28  4:29           ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Geliang Tang
  2021-06-28  4:29             ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Geliang Tang
@ 2021-07-07 23:44             ` Mat Martineau
  2021-07-08  0:44               ` Mat Martineau
  1 sibling, 1 reply; 21+ messages in thread
From: Mat Martineau @ 2021-07-07 23:44 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 28 Jun 2021, Geliang Tang wrote:

> This patch added the infinite mapping sending logic.
>
> Added a new flag snd_infinite_mapping_enable in mptcp_sock. Set it true
> when echoing the MP_FAIL in mptcp_pm_mp_fail_received.
>
> In mptcp_established_options_dss, if this flag is true, call the new
> function mptcp_write_infinite_mapping to set the infiniting mapping and
> sent it out.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---

Hi Geliang -

Thanks for working on the infinite mapping support to include in this 
patch set.

> net/mptcp/options.c  | 23 +++++++++++++++++++++--
> net/mptcp/pm.c       |  1 +
> net/mptcp/protocol.c |  1 +
> net/mptcp/protocol.h |  6 ++++++
> 4 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 9c2e122f1a6e..1fce6fddb6ab 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -572,8 +572,21 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
> 	}
> }
>
> +static void mptcp_write_infinite_mapping(struct mptcp_subflow_context *subflow,
> +					 struct mptcp_ext *ext)
> +{
> +	pr_debug("fail_seq=%llu", subflow->fail_seq);
> +
> +	if (ext->use_map) {
> +		ext->data_seq = subflow->fail_seq;
> +		ext->data_len = 0;
> +		WRITE_ONCE(msk->snd_infinite_mapping_enable, false);

I'm not sure it's enough to override the data_seq / data_len on an 
existing skb like this, more careful coordination with fallback might be 
needed. What do you think about handling the infinite mapping where 
data_seq & data_len get populated in mptcp_sendmsg_frag()?

The RFC also says the infinite mapping should only be sent once, I wonder 
if we can guarantee that with GSO?

> +	}
> +}
> +
> static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
> 					  bool snd_data_fin_enable,
> +					  bool snd_infinite_mapping_enable,

This new parameter isn't necessary, it can be read from the msk.

> 					  unsigned int *size,
> 					  unsigned int remaining,
> 					  struct mptcp_out_options *opts)
> @@ -589,7 +602,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
> 	opts->csum_reqd = READ_ONCE(msk->csum_enabled);
> 	mpext = skb ? mptcp_get_ext(skb) : NULL;
>
> -	if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
> +	if (!skb || (mpext && mpext->use_map) ||
> +	    snd_data_fin_enable || snd_infinite_mapping_enable) {
> 		unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
>
> 		if (mpext) {
> @@ -603,6 +617,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
> 		dss_size = map_size;
> 		if (skb && snd_data_fin_enable)
> 			mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
> +		if (skb && snd_infinite_mapping_enable)
> +			mptcp_write_infinite_mapping(subflow, &opts->ext_copy);
> 		ret = true;
> 	}
>
> @@ -811,6 +827,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> 	unsigned int opt_size = 0;
> +	bool snd_infinite_mapping;
> 	bool snd_data_fin;
> 	bool ret = false;
>
> @@ -831,9 +848,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> 	}
>
> 	snd_data_fin = mptcp_data_fin_enabled(msk);
> +	snd_infinite_mapping = mptcp_infinite_mapping_enabled(msk);
> 	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> 		ret = true;
> -	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
> +	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, snd_infinite_mapping,
> +					       &opt_size, remaining, opts)) {
> 		ret = true;
> 		if (opts->ext_copy.use_ack) {
> 			if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index 539fc2892191..1b3b815f1eca 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -257,6 +257,7 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
>
> 	if (!msk->pm.subflows) {
> 		if (!subflow->mp_fail_need_echo) {
> +			WRITE_ONCE(msk->snd_infinite_mapping_enable, true);
> 			subflow->send_mp_fail = 1;
> 			subflow->fail_seq = fail_seq;
> 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 199a36fe4f69..f1d057ee5887 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2727,6 +2727,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
> 	WRITE_ONCE(msk->fully_established, false);
> 	if (mp_opt->csum_reqd)
> 		WRITE_ONCE(msk->csum_enabled, true);
> +	WRITE_ONCE(msk->snd_infinite_mapping_enable, false);
>
> 	msk->write_seq = subflow_req->idsn + 1;
> 	msk->snd_nxt = msk->write_seq;
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 7a49454c77a6..fff2f5021619 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -244,6 +244,7 @@ struct mptcp_sock {
> 	bool		fully_established;
> 	bool		rcv_data_fin;
> 	bool		snd_data_fin_enable;
> +	bool		snd_infinite_mapping_enable;
> 	bool		rcv_fastclose;
> 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
> 	bool		csum_enabled;
> @@ -656,6 +657,11 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
> 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
> }
>
> +static inline bool mptcp_infinite_mapping_enabled(const struct mptcp_sock *msk)

I don't think this helper is needed.

> +{
> +	return READ_ONCE(msk->snd_infinite_mapping_enable);
> +}
> +
> static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
> {
> 	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving
  2021-06-28  4:29             ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Geliang Tang
  2021-06-28  4:29               ` [MPTCP][PATCH v3 mptcp-next 8/8] mptcp: add a mib for the infinite mapping sending Geliang Tang
@ 2021-07-07 23:49               ` Mat Martineau
  1 sibling, 0 replies; 21+ messages in thread
From: Mat Martineau @ 2021-07-07 23:49 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 28 Jun 2021, Geliang Tang wrote:

> This patch added the infinite mapping receiving logic.
>
> Added a new struct member infinite_mapping_received in struct
> mptcp_subflow_context, set it true when the infinite mapping is
> received.
>
> In subflow_check_data_avail, if this flag is set, fallback to a regular
> TCP.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> net/mptcp/protocol.h | 1 +
> net/mptcp/subflow.c  | 9 +++++++++
> 2 files changed, 10 insertions(+)
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index fff2f5021619..4ee97e9ceece 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -424,6 +424,7 @@ struct mptcp_subflow_context {
> 		send_mp_prio : 1,
> 		send_mp_fail : 1,
> 		mp_fail_need_echo : 1,
> +		infinite_mapping_received : 1,
> 		rx_eof : 1,
> 		can_ack : 1,        /* only after processing the remote a key */
> 		disposable : 1;	    /* ctx can be free at ulp release time */
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index d46c171ef9b0..987fb6b55145 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -965,7 +965,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
>
> 	data_len = mpext->data_len;
> 	if (data_len == 0) {
> +		pr_debug("infinite mapping received, data_seq=%llu", mpext->data_seq);
> 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
> +		subflow->infinite_mapping_received = 1;
> 		return MAPPING_INVALID;
> 	}
>
> @@ -1179,6 +1181,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
> 		}
> 	}
>
> +	if (subflow->infinite_mapping_received) {
> +		pr_fallback(msk);
> +		__mptcp_do_fallback(msk);
> +		subflow->infinite_mapping_received = 0;
> +		return true;
> +	}
> +

I think there's more checking to do before falling back with the infinite 
mapping. The RFC says:

"""
    This infinite mapping will be a DSS option (Section 3.3) on the first
    new packet, containing a Data Sequence Mapping that acts retroactively,
    referring to the start of the subflow sequence number of the most
    recent segment that was known to be delivered intact (i.e., was
    successfully DATA_ACKed).  From that point onward, data can be altered
    by a middlebox without affecting MPTCP, as the data stream is
    equivalent to a regular, legacy TCP session.
"""


> 	if (subflow->mp_join || subflow->fully_established) {
> 		/* fatal protocol error, close the socket.
> 		 * subflow_error_report() will introduce the appropriate barriers
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending
  2021-07-07 23:44             ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Mat Martineau
@ 2021-07-08  0:44               ` Mat Martineau
  0 siblings, 0 replies; 21+ messages in thread
From: Mat Martineau @ 2021-07-08  0:44 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Wed, 7 Jul 2021, Mat Martineau wrote:

> On Mon, 28 Jun 2021, Geliang Tang wrote:
>
>> This patch added the infinite mapping sending logic.
>> 
>> Added a new flag snd_infinite_mapping_enable in mptcp_sock. Set it true
>> when echoing the MP_FAIL in mptcp_pm_mp_fail_received.
>> 
>> In mptcp_established_options_dss, if this flag is true, call the new
>> function mptcp_write_infinite_mapping to set the infiniting mapping and
>> sent it out.
>> 
>> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
>> ---
>
> Hi Geliang -
>
> Thanks for working on the infinite mapping support to include in this patch 
> set.
>
>> net/mptcp/options.c  | 23 +++++++++++++++++++++--
>> net/mptcp/pm.c       |  1 +
>> net/mptcp/protocol.c |  1 +
>> net/mptcp/protocol.h |  6 ++++++
>> 4 files changed, 29 insertions(+), 2 deletions(-)
>> 
>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>> index 9c2e122f1a6e..1fce6fddb6ab 100644
>> --- a/net/mptcp/options.c
>> +++ b/net/mptcp/options.c
>> @@ -572,8 +572,21 @@ static void mptcp_write_data_fin(struct 
>> mptcp_subflow_context *subflow,
>> 	}
>> }
>> 
>> +static void mptcp_write_infinite_mapping(struct mptcp_subflow_context 
>> *subflow,
>> +					 struct mptcp_ext *ext)
>> +{
>> +	pr_debug("fail_seq=%llu", subflow->fail_seq);
>> +
>> +	if (ext->use_map) {
>> +		ext->data_seq = subflow->fail_seq;
>> +		ext->data_len = 0;
>> +		WRITE_ONCE(msk->snd_infinite_mapping_enable, false);
>
> I'm not sure it's enough to override the data_seq / data_len on an existing 
> skb like this, more careful coordination with fallback might be needed. What 
> do you think about handling the infinite mapping where data_seq & data_len 
> get populated in mptcp_sendmsg_frag()?
>
> The RFC also says the infinite mapping should only be sent once, I wonder if 
> we can guarantee that with GSO?
>

One more thing: if the checksum is enabled when an infinite mapping is 
set, it must also be set to 0. The rx side should check this too.

- Mat

>> +	}
>> +}
>> +
>> static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff 
>> *skb,
>> 					  bool snd_data_fin_enable,
>> +					  bool snd_infinite_mapping_enable,
>
> This new parameter isn't necessary, it can be read from the msk.
>
>> 					  unsigned int *size,
>> 					  unsigned int remaining,
>> 					  struct mptcp_out_options *opts)
>> @@ -589,7 +602,8 @@ static bool mptcp_established_options_dss(struct sock 
>> *sk, struct sk_buff *skb,
>> 	opts->csum_reqd = READ_ONCE(msk->csum_enabled);
>> 	mpext = skb ? mptcp_get_ext(skb) : NULL;
>> 
>> -	if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
>> +	if (!skb || (mpext && mpext->use_map) ||
>> +	    snd_data_fin_enable || snd_infinite_mapping_enable) {
>> 		unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + 
>> TCPOLEN_MPTCP_DSS_MAP64;
>>
>> 		if (mpext) {
>> @@ -603,6 +617,8 @@ static bool mptcp_established_options_dss(struct sock 
>> *sk, struct sk_buff *skb,
>> 		dss_size = map_size;
>> 		if (skb && snd_data_fin_enable)
>> 			mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
>> +		if (skb && snd_infinite_mapping_enable)
>> +			mptcp_write_infinite_mapping(subflow, 
>> &opts->ext_copy);
>> 		ret = true;
>> 	}
>> 
>> @@ -811,6 +827,7 @@ bool mptcp_established_options(struct sock *sk, struct 
>> sk_buff *skb,
>> 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
>> 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
>> 	unsigned int opt_size = 0;
>> +	bool snd_infinite_mapping;
>> 	bool snd_data_fin;
>> 	bool ret = false;
>> 
>> @@ -831,9 +848,11 @@ bool mptcp_established_options(struct sock *sk, struct 
>> sk_buff *skb,
>> 	}
>>
>> 	snd_data_fin = mptcp_data_fin_enabled(msk);
>> +	snd_infinite_mapping = mptcp_infinite_mapping_enabled(msk);
>> 	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, 
>> remaining, opts))
>> 		ret = true;
>> -	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, 
>> &opt_size, remaining, opts)) {
>> +	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, 
>> snd_infinite_mapping,
>> +					       &opt_size, remaining, opts)) {
>> 		ret = true;
>> 		if (opts->ext_copy.use_ack) {
>> 			if (mptcp_established_options_mp_fail(sk, &opt_size, 
>> remaining, opts)) {
>> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
>> index 539fc2892191..1b3b815f1eca 100644
>> --- a/net/mptcp/pm.c
>> +++ b/net/mptcp/pm.c
>> @@ -257,6 +257,7 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 
>> fail_seq)
>>
>> 	if (!msk->pm.subflows) {
>> 		if (!subflow->mp_fail_need_echo) {
>> +			WRITE_ONCE(msk->snd_infinite_mapping_enable, true);
>> 			subflow->send_mp_fail = 1;
>> 			subflow->fail_seq = fail_seq;
>> 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 199a36fe4f69..f1d057ee5887 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -2727,6 +2727,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
>> 	WRITE_ONCE(msk->fully_established, false);
>> 	if (mp_opt->csum_reqd)
>> 		WRITE_ONCE(msk->csum_enabled, true);
>> +	WRITE_ONCE(msk->snd_infinite_mapping_enable, false);
>>
>> 	msk->write_seq = subflow_req->idsn + 1;
>> 	msk->snd_nxt = msk->write_seq;
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index 7a49454c77a6..fff2f5021619 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -244,6 +244,7 @@ struct mptcp_sock {
>> 	bool		fully_established;
>> 	bool		rcv_data_fin;
>> 	bool		snd_data_fin_enable;
>> +	bool		snd_infinite_mapping_enable;
>> 	bool		rcv_fastclose;
>> 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN 
>> */
>> 	bool		csum_enabled;
>> @@ -656,6 +657,11 @@ static inline bool mptcp_data_fin_enabled(const struct 
>> mptcp_sock *msk)
>> 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
>> }
>> 
>> +static inline bool mptcp_infinite_mapping_enabled(const struct mptcp_sock 
>> *msk)
>
> I don't think this helper is needed.
>
>> +{
>> +	return READ_ONCE(msk->snd_infinite_mapping_enable);
>> +}
>> +
>> static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock 
>> *ssk)
>> {
>> 	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= 
>> READ_ONCE(sk->sk_sndbuf))
>> -- 
>> 2.31.1
>> 
>> 
>> 
>
> --
> Mat Martineau
> Intel
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail
  2021-07-07 23:20       ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Mat Martineau
@ 2021-07-13 12:44         ` Geliang Tang
  2021-07-13 20:35           ` Mat Martineau
  0 siblings, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-07-13 12:44 UTC (permalink / raw)
  To: Mat Martineau; +Cc: mptcp

Hi Mat,

Sorry for late response on this.

Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年7月8日周四 上午7:20写道：
>
> On Mon, 28 Jun 2021, Geliang Tang wrote:
>
> > When a bad checksum is detected, send out the MP_FAIL option.
> >
> > When multiple subflows are in use, close the affected subflow with a RST
> > that includes an MP_FAIL option.
> >
> > When a single subfow is in use, send back an MP_FAIL option on the
> > subflow-level ACK. And the receiver of this MP_FAIL respond with an
> > MP_FAIL in the reverse direction.
> >
> > Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> > ---
> > net/mptcp/pm.c       | 10 ++++++++++
> > net/mptcp/protocol.h | 14 ++++++++++++++
> > net/mptcp/subflow.c  | 18 ++++++++++++++++++
> > 3 files changed, 42 insertions(+)
> >
> > diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> > index d4c19420194a..c34c9c0b2fa5 100644
> > --- a/net/mptcp/pm.c
> > +++ b/net/mptcp/pm.c
> > @@ -250,8 +250,18 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
> > void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
> > {
> >       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> > +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> >
> >       pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
> > +
> > +     if (!msk->pm.subflows) {
>
> The pm.lock isn't held so it's not safe to access pm.subflows
>
> I don't think it's sufficient to read pm.subflows with the lock or add
> READ_ONCE/WRITE_ONCE, since that would still allow race conditions with
> the msk. To handle fallback when receiving MP_FAIL I think the
> sock_owned_by_user() checks and delegated callback (similar to
> mptcp_subflow_process_delegated()) may be needed.
>

I think it's better to use the below mptcp_has_another_subflow() function
here instead of using msk->pm.subflows to check the single subflow case.

> > +             if (!subflow->mp_fail_need_echo) {
> > +                     subflow->send_mp_fail = 1;
> > +                     subflow->fail_seq = fail_seq;
>
> Echoing the fail_seq back doesn't seem correct, from the RFC it seems like
> this side should send a sequence number for the opposite data direction?
> Do you agree?

So we should use 'subflow->fail_seq = subflow->map_seq;' here?

>
> > +             } else {
> > +                     subflow->mp_fail_need_echo = 0;
> > +             }
> > +     }
> > }
> >
> > /* path manager helpers */
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 8e050575a2d9..7a49454c77a6 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -422,6 +422,7 @@ struct mptcp_subflow_context {
> >               backup : 1,
> >               send_mp_prio : 1,
> >               send_mp_fail : 1,
> > +             mp_fail_need_echo : 1,
>
> I think mp_fail_expect_echo would be a more accurate name.

Updated in v4.

>
> >               rx_eof : 1,
> >               can_ack : 1,        /* only after processing the remote a key */
> >               disposable : 1;     /* ctx can be free at ulp release time */
> > @@ -594,6 +595,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
> >       inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
> > }
> >
> > +static inline bool mptcp_has_another_subflow_established(struct sock *ssk)
> > +{
> > +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
> > +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> > +
> > +     mptcp_for_each_subflow(msk, tmp) {
> > +             if (tmp->fully_established && tmp != subflow)
>
> Why check tmp->fully_established here?

I'll drop tmp->fully_established, and rename this function
mptcp_has_another_subflow to check whether there's a single subflow.


In my test, this MP_FAIL patchset only works in the multiple subflows case.
The receiver detected the bad checksum, sent RST + MP_FAIL to close this
subflow and discard the data with the bad checksum. Then the sender will
retransmit a new data with good checksum from other subflow. And the
receiver can resemble the whole data successfully.

But the single subflow case dosen't work. I don't know how the new data
can be retransmitted from the sender side when it fallback to regular TCP.
So I'm not sure how to implement the single subflow case.

Could you please give me some suggestions?

Thanks,
-Geliang


>
>
> - Mat
>
>
> > +                     return true;
> > +     }
> > +
> > +     return false;
> > +}
> > +
> > void __init mptcp_proto_init(void);
> > #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> > int __init mptcp_proto_v6_init(void);
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 0b5d4a3eadcd..46302208c474 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -913,6 +913,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
> >       csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
> >       if (unlikely(csum_fold(csum))) {
> >               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
> > +             subflow->send_mp_fail = 1;
> > +             subflow->fail_seq = subflow->map_seq;
> >               return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
> >       }
> >
> > @@ -1160,6 +1162,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
> >
> > fallback:
> >       /* RFC 8684 section 3.7. */
> > +     if (subflow->send_mp_fail) {
> > +             if (mptcp_has_another_subflow_established(ssk)) {
> > +                     mptcp_subflow_reset(ssk);
> > +                     while ((skb = skb_peek(&ssk->sk_receive_queue)))
> > +                             sk_eat_skb(ssk, skb);
> > +                     WRITE_ONCE(subflow->data_avail, 0);
> > +                     return true;
> > +             }
> > +
> > +             if (!msk->pm.subflows) {
> > +                     subflow->mp_fail_need_echo = 1;
> > +                     WRITE_ONCE(subflow->data_avail, 0);
> > +                     return true;
> > +             }
> > +     }
> > +
> >       if (subflow->mp_join || subflow->fully_established) {
> >               /* fatal protocol error, close the socket.
> >                * subflow_error_report() will introduce the appropriate barriers
> > --
> > 2.31.1
> >
> >
> >
>
> --
> Mat Martineau
> Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail
  2021-07-13 12:44         ` Geliang Tang
@ 2021-07-13 20:35           ` Mat Martineau
  2021-07-14  3:56             ` Geliang Tang
  0 siblings, 1 reply; 21+ messages in thread
From: Mat Martineau @ 2021-07-13 20:35 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

[-- Attachment #1: Type: text/plain, Size: 7251 bytes --]

On Tue, 13 Jul 2021, Geliang Tang wrote:

> Hi Mat,
>
> Sorry for late response on this.
>
> Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年7月8日周四 上午7:20写道：
>>
>> On Mon, 28 Jun 2021, Geliang Tang wrote:
>>
>>> When a bad checksum is detected, send out the MP_FAIL option.
>>>
>>> When multiple subflows are in use, close the affected subflow with a RST
>>> that includes an MP_FAIL option.
>>>
>>> When a single subfow is in use, send back an MP_FAIL option on the
>>> subflow-level ACK. And the receiver of this MP_FAIL respond with an
>>> MP_FAIL in the reverse direction.
>>>
>>> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
>>> ---
>>> net/mptcp/pm.c       | 10 ++++++++++
>>> net/mptcp/protocol.h | 14 ++++++++++++++
>>> net/mptcp/subflow.c  | 18 ++++++++++++++++++
>>> 3 files changed, 42 insertions(+)
>>>
>>> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
>>> index d4c19420194a..c34c9c0b2fa5 100644
>>> --- a/net/mptcp/pm.c
>>> +++ b/net/mptcp/pm.c
>>> @@ -250,8 +250,18 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
>>> void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
>>> {
>>>       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
>>> +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
>>>
>>>       pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
>>> +
>>> +     if (!msk->pm.subflows) {
>>
>> The pm.lock isn't held so it's not safe to access pm.subflows
>>
>> I don't think it's sufficient to read pm.subflows with the lock or add
>> READ_ONCE/WRITE_ONCE, since that would still allow race conditions with
>> the msk. To handle fallback when receiving MP_FAIL I think the
>> sock_owned_by_user() checks and delegated callback (similar to
>> mptcp_subflow_process_delegated()) may be needed.
>>
>
> I think it's better to use the below mptcp_has_another_subflow() function
> here instead of using msk->pm.subflows to check the single subflow case.
>

Yes, that sounds good.

>>> +             if (!subflow->mp_fail_need_echo) {
>>> +                     subflow->send_mp_fail = 1;
>>> +                     subflow->fail_seq = fail_seq;
>>
>> Echoing the fail_seq back doesn't seem correct, from the RFC it seems like
>> this side should send a sequence number for the opposite data direction?
>> Do you agree?
>
> So we should use 'subflow->fail_seq = subflow->map_seq;' here?

I think so - but I'm concerned about the possibility of out-of-order data 
and checking for single- or multiple-subflow cases. I need to look at the 
RFC some more to tell for sure.

>
>>
>>> +             } else {
>>> +                     subflow->mp_fail_need_echo = 0;
>>> +             }
>>> +     }
>>> }
>>>
>>> /* path manager helpers */
>>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>>> index 8e050575a2d9..7a49454c77a6 100644
>>> --- a/net/mptcp/protocol.h
>>> +++ b/net/mptcp/protocol.h
>>> @@ -422,6 +422,7 @@ struct mptcp_subflow_context {
>>>               backup : 1,
>>>               send_mp_prio : 1,
>>>               send_mp_fail : 1,
>>> +             mp_fail_need_echo : 1,
>>
>> I think mp_fail_expect_echo would be a more accurate name.
>
> Updated in v4.
>

Ok.

>>
>>>               rx_eof : 1,
>>>               can_ack : 1,        /* only after processing the remote a key */
>>>               disposable : 1;     /* ctx can be free at ulp release time */
>>> @@ -594,6 +595,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
>>>       inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
>>> }
>>>
>>> +static inline bool mptcp_has_another_subflow_established(struct sock *ssk)
>>> +{
>>> +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
>>> +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
>>> +
>>> +     mptcp_for_each_subflow(msk, tmp) {
>>> +             if (tmp->fully_established && tmp != subflow)
>>
>> Why check tmp->fully_established here?
>
> I'll drop tmp->fully_established, and rename this function
> mptcp_has_another_subflow to check whether there's a single subflow.

Sounds good.

>
>
> In my test, this MP_FAIL patchset only works in the multiple subflows case.
> The receiver detected the bad checksum, sent RST + MP_FAIL to close this
> subflow and discard the data with the bad checksum. Then the sender will
> retransmit a new data with good checksum from other subflow. And the
> receiver can resemble the whole data successfully.
>
> But the single subflow case dosen't work. I don't know how the new data
> can be retransmitted from the sender side when it fallback to regular TCP.
> So I'm not sure how to implement the single subflow case.
>
> Could you please give me some suggestions?

Yes, the multiple subflow case is the simpler one this time. We can throw 
away the affected subflow and recover using the others.

Since the non-contiguous, single subflow case has to close the subflow, 
that's more like the "easier" case.

So, it's the infinite mapping case that's tricky. Have you looked at the 
multipath-tcp.org code? The important part seems to be using the 
information in the infinite mapping to determine what data to keep or 
throw away before beginning "fallback" behavior. I can look at the 
infinite mapping information in the RFC and see how that fits with our 
implementation tomorrow.

- Mat


>>
>>> +                     return true;
>>> +     }
>>> +
>>> +     return false;
>>> +}
>>> +
>>> void __init mptcp_proto_init(void);
>>> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>>> int __init mptcp_proto_v6_init(void);
>>> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
>>> index 0b5d4a3eadcd..46302208c474 100644
>>> --- a/net/mptcp/subflow.c
>>> +++ b/net/mptcp/subflow.c
>>> @@ -913,6 +913,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
>>>       csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
>>>       if (unlikely(csum_fold(csum))) {
>>>               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
>>> +             subflow->send_mp_fail = 1;
>>> +             subflow->fail_seq = subflow->map_seq;
>>>               return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
>>>       }
>>>
>>> @@ -1160,6 +1162,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
>>>
>>> fallback:
>>>       /* RFC 8684 section 3.7. */
>>> +     if (subflow->send_mp_fail) {
>>> +             if (mptcp_has_another_subflow_established(ssk)) {
>>> +                     mptcp_subflow_reset(ssk);
>>> +                     while ((skb = skb_peek(&ssk->sk_receive_queue)))
>>> +                             sk_eat_skb(ssk, skb);
>>> +                     WRITE_ONCE(subflow->data_avail, 0);
>>> +                     return true;
>>> +             }
>>> +
>>> +             if (!msk->pm.subflows) {
>>> +                     subflow->mp_fail_need_echo = 1;
>>> +                     WRITE_ONCE(subflow->data_avail, 0);
>>> +                     return true;
>>> +             }
>>> +     }
>>> +
>>>       if (subflow->mp_join || subflow->fully_established) {
>>>               /* fatal protocol error, close the socket.
>>>                * subflow_error_report() will introduce the appropriate barriers
>>> --
>>> 2.31.1

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail
  2021-07-13 20:35           ` Mat Martineau
@ 2021-07-14  3:56             ` Geliang Tang
  2021-07-14 17:48               ` Mat Martineau
  0 siblings, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-07-14  3:56 UTC (permalink / raw)
  To: Mat Martineau; +Cc: mptcp

Hi Mat,

Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年7月14日周三 上午4:35写道：
>
> On Tue, 13 Jul 2021, Geliang Tang wrote:
>
> > Hi Mat,
> >
> > Sorry for late response on this.
> >
> > Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年7月8日周四 上午7:20写道：
> >>
> >> On Mon, 28 Jun 2021, Geliang Tang wrote:
> >>
> >>> When a bad checksum is detected, send out the MP_FAIL option.
> >>>
> >>> When multiple subflows are in use, close the affected subflow with a RST
> >>> that includes an MP_FAIL option.
> >>>
> >>> When a single subfow is in use, send back an MP_FAIL option on the
> >>> subflow-level ACK. And the receiver of this MP_FAIL respond with an
> >>> MP_FAIL in the reverse direction.
> >>>
> >>> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> >>> ---
> >>> net/mptcp/pm.c       | 10 ++++++++++
> >>> net/mptcp/protocol.h | 14 ++++++++++++++
> >>> net/mptcp/subflow.c  | 18 ++++++++++++++++++
> >>> 3 files changed, 42 insertions(+)
> >>>
> >>> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> >>> index d4c19420194a..c34c9c0b2fa5 100644
> >>> --- a/net/mptcp/pm.c
> >>> +++ b/net/mptcp/pm.c
> >>> @@ -250,8 +250,18 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
> >>> void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
> >>> {
> >>>       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> >>> +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> >>>
> >>>       pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
> >>> +
> >>> +     if (!msk->pm.subflows) {
> >>
> >> The pm.lock isn't held so it's not safe to access pm.subflows
> >>
> >> I don't think it's sufficient to read pm.subflows with the lock or add
> >> READ_ONCE/WRITE_ONCE, since that would still allow race conditions with
> >> the msk. To handle fallback when receiving MP_FAIL I think the
> >> sock_owned_by_user() checks and delegated callback (similar to
> >> mptcp_subflow_process_delegated()) may be needed.
> >>
> >
> > I think it's better to use the below mptcp_has_another_subflow() function
> > here instead of using msk->pm.subflows to check the single subflow case.
> >
>
> Yes, that sounds good.
>
> >>> +             if (!subflow->mp_fail_need_echo) {
> >>> +                     subflow->send_mp_fail = 1;
> >>> +                     subflow->fail_seq = fail_seq;
> >>
> >> Echoing the fail_seq back doesn't seem correct, from the RFC it seems like
> >> this side should send a sequence number for the opposite data direction?
> >> Do you agree?
> >
> > So we should use 'subflow->fail_seq = subflow->map_seq;' here?
>
> I think so - but I'm concerned about the possibility of out-of-order data
> and checking for single- or multiple-subflow cases. I need to look at the
> RFC some more to tell for sure.
>
> >
> >>
> >>> +             } else {
> >>> +                     subflow->mp_fail_need_echo = 0;
> >>> +             }
> >>> +     }
> >>> }
> >>>
> >>> /* path manager helpers */
> >>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> >>> index 8e050575a2d9..7a49454c77a6 100644
> >>> --- a/net/mptcp/protocol.h
> >>> +++ b/net/mptcp/protocol.h
> >>> @@ -422,6 +422,7 @@ struct mptcp_subflow_context {
> >>>               backup : 1,
> >>>               send_mp_prio : 1,
> >>>               send_mp_fail : 1,
> >>> +             mp_fail_need_echo : 1,
> >>
> >> I think mp_fail_expect_echo would be a more accurate name.
> >
> > Updated in v4.
> >
>
> Ok.
>
> >>
> >>>               rx_eof : 1,
> >>>               can_ack : 1,        /* only after processing the remote a key */
> >>>               disposable : 1;     /* ctx can be free at ulp release time */
> >>> @@ -594,6 +595,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
> >>>       inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
> >>> }
> >>>
> >>> +static inline bool mptcp_has_another_subflow_established(struct sock *ssk)
> >>> +{
> >>> +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
> >>> +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> >>> +
> >>> +     mptcp_for_each_subflow(msk, tmp) {
> >>> +             if (tmp->fully_established && tmp != subflow)
> >>
> >> Why check tmp->fully_established here?
> >
> > I'll drop tmp->fully_established, and rename this function
> > mptcp_has_another_subflow to check whether there's a single subflow.
>
> Sounds good.
>
> >
> >
> > In my test, this MP_FAIL patchset only works in the multiple subflows case.
> > The receiver detected the bad checksum, sent RST + MP_FAIL to close this
> > subflow and discard the data with the bad checksum. Then the sender will
> > retransmit a new data with good checksum from other subflow. And the
> > receiver can resemble the whole data successfully.
> >
> > But the single subflow case dosen't work. I don't know how the new data
> > can be retransmitted from the sender side when it fallback to regular TCP.
> > So I'm not sure how to implement the single subflow case.
> >
> > Could you please give me some suggestions?
>
> Yes, the multiple subflow case is the simpler one this time. We can throw
> away the affected subflow and recover using the others.
>
> Since the non-contiguous, single subflow case has to close the subflow,
> that's more like the "easier" case.
>
> So, it's the infinite mapping case that's tricky. Have you looked at the
> multipath-tcp.org code? The important part seems to be using the
> information in the infinite mapping to determine what data to keep or
> throw away before beginning "fallback" behavior. I can look at the
> infinite mapping information in the RFC and see how that fits with our
> implementation tomorrow.
>

Thanks for your help.

I want to split this patchset into two part, "MP_FAIL support" and "the
infinite mapping support".

In the MP_FAIL part, just deal with the multiple subflows case, the easier
case. Include the MP_FAIL sending and MP_FAIL receiving code in the part.
This part had been implemented and tested. I'll send a v4 for it.

And put the single subflow case into the new "infinite mapping" part. I'll
add a new ticket on github to trace it, and send a RFC version for it later.
It seems that there's still a lot of work to do in this part.

Do you agree?

-Geliang

> - Mat
>
>
> >>
> >>> +                     return true;
> >>> +     }
> >>> +
> >>> +     return false;
> >>> +}
> >>> +
> >>> void __init mptcp_proto_init(void);
> >>> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> >>> int __init mptcp_proto_v6_init(void);
> >>> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> >>> index 0b5d4a3eadcd..46302208c474 100644
> >>> --- a/net/mptcp/subflow.c
> >>> +++ b/net/mptcp/subflow.c
> >>> @@ -913,6 +913,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
> >>>       csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
> >>>       if (unlikely(csum_fold(csum))) {
> >>>               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
> >>> +             subflow->send_mp_fail = 1;
> >>> +             subflow->fail_seq = subflow->map_seq;
> >>>               return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
> >>>       }
> >>>
> >>> @@ -1160,6 +1162,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
> >>>
> >>> fallback:
> >>>       /* RFC 8684 section 3.7. */
> >>> +     if (subflow->send_mp_fail) {
> >>> +             if (mptcp_has_another_subflow_established(ssk)) {
> >>> +                     mptcp_subflow_reset(ssk);
> >>> +                     while ((skb = skb_peek(&ssk->sk_receive_queue)))
> >>> +                             sk_eat_skb(ssk, skb);
> >>> +                     WRITE_ONCE(subflow->data_avail, 0);
> >>> +                     return true;
> >>> +             }
> >>> +
> >>> +             if (!msk->pm.subflows) {
> >>> +                     subflow->mp_fail_need_echo = 1;
> >>> +                     WRITE_ONCE(subflow->data_avail, 0);
> >>> +                     return true;
> >>> +             }
> >>> +     }
> >>> +
> >>>       if (subflow->mp_join || subflow->fully_established) {
> >>>               /* fatal protocol error, close the socket.
> >>>                * subflow_error_report() will introduce the appropriate barriers
> >>> --
> >>> 2.31.1
>
> --
> Mat Martineau
> Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending
  2021-06-28  4:28 ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Geliang Tang
  2021-06-28  4:28   ` [MPTCP][PATCH v3 mptcp-next 2/8] mptcp: MP_FAIL suboption receiving Geliang Tang
  2021-07-07 23:07   ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Mat Martineau
@ 2021-07-14  8:45   ` Paolo Abeni
  2021-07-14  9:16     ` Geliang Tang
  2 siblings, 1 reply; 21+ messages in thread
From: Paolo Abeni @ 2021-07-14  8:45 UTC (permalink / raw)
  To: Geliang Tang, mptcp

On Mon, 2021-06-28 at 12:28 +0800, Geliang Tang wrote:
> This patch added the MP_FAIL suboption sending support.
> 
> Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
> this flag is set, send out MP_FAIL suboption.
> 
> Add a new member fail_seq in struct mptcp_out_options to save the data
> sequence number to put into the MP_FAIL suboption.
> 
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
>  include/net/mptcp.h  |  1 +
>  net/mptcp/options.c  | 54 ++++++++++++++++++++++++++++++++++++++++++--
>  net/mptcp/protocol.h |  4 ++++
>  3 files changed, 57 insertions(+), 2 deletions(-)
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index cb580b06152f..f48d3b5a3fd4 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -72,6 +72,7 @@ struct mptcp_out_options {
>  	u32 nonce;
>  	u64 thmac;
>  	u32 token;
> +	u64 fail_seq;

Since we can't have both a valid mapping and MP_FAIL in the same
packet, it would be better to avoid increasing the 'mptcp_out_options'
size, e.g. re-using some ext_copy field, or unioning with some other
field.

mptcp_out_options has grown quite a bit and we should really look into
shrinking it.

>  	u8 hmac[20];
>  	struct mptcp_ext ext_copy;
>  #endif
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index b5850afea343..b78defe1aed9 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -771,6 +771,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
>  	opts->reset_reason = subflow->reset_reason;
>  }
>  
> +static bool mptcp_established_options_mp_fail(struct sock *sk,
> +					      unsigned int *size,
> +					      unsigned int remaining,
> +					      struct mptcp_out_options *opts)
> +{
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +
> +	if (!subflow->send_mp_fail)
> +		return false;
> +
> +	if (remaining < TCPOLEN_MPTCP_FAIL)
> +		return false;
> +
> +	*size = TCPOLEN_MPTCP_FAIL;
> +	opts->suboptions |= OPTION_MPTCP_FAIL;
> +	opts->fail_seq = subflow->fail_seq;
> +
> +	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> +
> +	return true;
> +}
> +
>  bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
>  			       unsigned int *size, unsigned int remaining,
>  			       struct mptcp_out_options *opts)
> @@ -787,15 +809,29 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
>  		return false;
>  
>  	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> -		mptcp_established_options_rst(sk, skb, size, remaining, opts);
> +		if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> +			*size += opt_size;
> +			remaining -= opt_size;
> +		}
> +		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> +		*size += opt_size;
> +		remaining -= opt_size;
>  		return true;
>  	}
>  
>  	snd_data_fin = mptcp_data_fin_enabled(msk);
>  	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
>  		ret = true;
> -	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> +	else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
>  		ret = true;
> +		if (opts->ext_copy.use_ack) {

Is the extra check on 'opts->ext_copy.use_ack' necessary? can we just
look for mp_fail?


> +			if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> +				*size += opt_size;
> +				remaining -= opt_size;
> +				ret = true;
> +			}
> +		}
> +	}
>  
>  	/* we reserved enough space for the above options, and exceeding the
>  	 * TCP option space would be fatal
> @@ -1334,6 +1370,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
>  				      opts->backup, TCPOPT_NOP);
>  	}
>  
> +	if (OPTION_MPTCP_FAIL & opts->suboptions) {
> +		const struct sock *ssk = (const struct sock *)tp;
> +		struct mptcp_subflow_context *subflow;
> +
> +		subflow = mptcp_subflow_ctx(ssk);
> +		subflow->send_mp_fail = 0;
> +
> +		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
> +				      TCPOLEN_MPTCP_FAIL,
> +				      0, 0);
> +		put_unaligned_be64(opts->fail_seq, ptr);
> +		ptr += 2;
> +	}
> +
>  	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
>  		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
>  				      TCPOLEN_MPTCP_MPJ_SYN,
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 426ed80fe72f..007af5e4ba3d 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -26,6 +26,7 @@
>  #define OPTION_MPTCP_FASTCLOSE	BIT(8)
>  #define OPTION_MPTCP_PRIO	BIT(9)
>  #define OPTION_MPTCP_RST	BIT(10)
> +#define OPTION_MPTCP_FAIL	BIT(11)
>  
>  /* MPTCP option subtypes */
>  #define MPTCPOPT_MP_CAPABLE	0
> @@ -67,6 +68,7 @@
>  #define TCPOLEN_MPTCP_PRIO_ALIGN	4
>  #define TCPOLEN_MPTCP_FASTCLOSE		12
>  #define TCPOLEN_MPTCP_RST		4
> +#define TCPOLEN_MPTCP_FAIL		12
>  
>  #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
>  
> @@ -417,6 +419,7 @@ struct mptcp_subflow_context {
>  		mpc_map : 1,
>  		backup : 1,
>  		send_mp_prio : 1,
> +		send_mp_fail : 1,
>  		rx_eof : 1,
>  		can_ack : 1,        /* only after processing the remote a key */
>  		disposable : 1;	    /* ctx can be free at ulp release time */
> @@ -431,6 +434,7 @@ struct mptcp_subflow_context {
>  	u8	reset_seen:1;
>  	u8	reset_transient:1;
>  	u8	reset_reason:4;
> +	u64	fail_seq;
>  
>  	long	delegated_status;
>  	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending
  2021-07-14  8:45   ` Paolo Abeni
@ 2021-07-14  9:16     ` Geliang Tang
  2021-07-14 15:49       ` Paolo Abeni
  0 siblings, 1 reply; 21+ messages in thread
From: Geliang Tang @ 2021-07-14  9:16 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

Hi Paolo,

Thanks for your review!

Paolo Abeni <pabeni@redhat.com> 于2021年7月14日周三 下午4:45写道：
>
> On Mon, 2021-06-28 at 12:28 +0800, Geliang Tang wrote:
> > This patch added the MP_FAIL suboption sending support.
> >
> > Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
> > this flag is set, send out MP_FAIL suboption.
> >
> > Add a new member fail_seq in struct mptcp_out_options to save the data
> > sequence number to put into the MP_FAIL suboption.
> >
> > Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> > ---
> >  include/net/mptcp.h  |  1 +
> >  net/mptcp/options.c  | 54 ++++++++++++++++++++++++++++++++++++++++++--
> >  net/mptcp/protocol.h |  4 ++++
> >  3 files changed, 57 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index cb580b06152f..f48d3b5a3fd4 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -72,6 +72,7 @@ struct mptcp_out_options {
> >       u32 nonce;
> >       u64 thmac;
> >       u32 token;
> > +     u64 fail_seq;
>
> Since we can't have both a valid mapping and MP_FAIL in the same
> packet, it would be better to avoid increasing the 'mptcp_out_options'
> size, e.g. re-using some ext_copy field, or unioning with some other
> field.

The RFC says an MP_FAIL option could be included in a RST or on the
subflow-level ACK. So I think we can't re-using the ext_copy field if it's
on the subflow-level ACK.

How about unioning this fail_seq field with ahmac field?

>
> mptcp_out_options has grown quite a bit and we should really look into
> shrinking it.
>
> >       u8 hmac[20];
> >       struct mptcp_ext ext_copy;
> >  #endif
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index b5850afea343..b78defe1aed9 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -771,6 +771,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
> >       opts->reset_reason = subflow->reset_reason;
> >  }
> >
> > +static bool mptcp_established_options_mp_fail(struct sock *sk,
> > +                                           unsigned int *size,
> > +                                           unsigned int remaining,
> > +                                           struct mptcp_out_options *opts)
> > +{
> > +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> > +
> > +     if (!subflow->send_mp_fail)
> > +             return false;
> > +
> > +     if (remaining < TCPOLEN_MPTCP_FAIL)
> > +             return false;
> > +
> > +     *size = TCPOLEN_MPTCP_FAIL;
> > +     opts->suboptions |= OPTION_MPTCP_FAIL;
> > +     opts->fail_seq = subflow->fail_seq;
> > +
> > +     pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> > +
> > +     return true;
> > +}
> > +
> >  bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> >                              unsigned int *size, unsigned int remaining,
> >                              struct mptcp_out_options *opts)
> > @@ -787,15 +809,29 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> >               return false;
> >
> >       if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> > -             mptcp_established_options_rst(sk, skb, size, remaining, opts);
> > +             if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> > +                     *size += opt_size;
> > +                     remaining -= opt_size;
> > +             }
> > +             mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> > +             *size += opt_size;
> > +             remaining -= opt_size;
> >               return true;
> >       }
> >
> >       snd_data_fin = mptcp_data_fin_enabled(msk);
> >       if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> >               ret = true;
> > -     else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> > +     else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
> >               ret = true;
> > +             if (opts->ext_copy.use_ack) {
>
> Is the extra check on 'opts->ext_copy.use_ack' necessary? can we just
> look for mp_fail?
>

I added this check since the RFC says an MP_FAIL option could be included
on the subflow-level ACK:

'''
In this case, if a receiver identifies a checksum failure when there is
only one path, it will send back an MP_FAIL option on the subflow-level
ACK, referring to the data-level sequence number of the start of the
segment on which the checksum error was detected.
'''

Do I understand right?

Thanks,
-Geliang


>
> > +                     if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> > +                             *size += opt_size;
> > +                             remaining -= opt_size;
> > +                             ret = true;
> > +                     }
> > +             }
> > +     }
> >
> >       /* we reserved enough space for the above options, and exceeding the
> >        * TCP option space would be fatal
> > @@ -1334,6 +1370,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> >                                     opts->backup, TCPOPT_NOP);
> >       }
> >
> > +     if (OPTION_MPTCP_FAIL & opts->suboptions) {
> > +             const struct sock *ssk = (const struct sock *)tp;
> > +             struct mptcp_subflow_context *subflow;
> > +
> > +             subflow = mptcp_subflow_ctx(ssk);
> > +             subflow->send_mp_fail = 0;
> > +
> > +             *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
> > +                                   TCPOLEN_MPTCP_FAIL,
> > +                                   0, 0);
> > +             put_unaligned_be64(opts->fail_seq, ptr);
> > +             ptr += 2;
> > +     }
> > +
> >       if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
> >               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
> >                                     TCPOLEN_MPTCP_MPJ_SYN,
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 426ed80fe72f..007af5e4ba3d 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -26,6 +26,7 @@
> >  #define OPTION_MPTCP_FASTCLOSE       BIT(8)
> >  #define OPTION_MPTCP_PRIO    BIT(9)
> >  #define OPTION_MPTCP_RST     BIT(10)
> > +#define OPTION_MPTCP_FAIL    BIT(11)
> >
> >  /* MPTCP option subtypes */
> >  #define MPTCPOPT_MP_CAPABLE  0
> > @@ -67,6 +68,7 @@
> >  #define TCPOLEN_MPTCP_PRIO_ALIGN     4
> >  #define TCPOLEN_MPTCP_FASTCLOSE              12
> >  #define TCPOLEN_MPTCP_RST            4
> > +#define TCPOLEN_MPTCP_FAIL           12
> >
> >  #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM      (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
> >
> > @@ -417,6 +419,7 @@ struct mptcp_subflow_context {
> >               mpc_map : 1,
> >               backup : 1,
> >               send_mp_prio : 1,
> > +             send_mp_fail : 1,
> >               rx_eof : 1,
> >               can_ack : 1,        /* only after processing the remote a key */
> >               disposable : 1;     /* ctx can be free at ulp release time */
> > @@ -431,6 +434,7 @@ struct mptcp_subflow_context {
> >       u8      reset_seen:1;
> >       u8      reset_transient:1;
> >       u8      reset_reason:4;
> > +     u64     fail_seq;
> >
> >       long    delegated_status;
> >       struct  list_head delegated_node;   /* link into delegated_action, protected by local BH */
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending
  2021-07-14  9:16     ` Geliang Tang
@ 2021-07-14 15:49       ` Paolo Abeni
  0 siblings, 0 replies; 21+ messages in thread
From: Paolo Abeni @ 2021-07-14 15:49 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Wed, 2021-07-14 at 17:16 +0800, Geliang Tang wrote:
> Paolo Abeni <pabeni@redhat.com> 于2021年7月14日周三 下午4:45写道：
> > On Mon, 2021-06-28 at 12:28 +0800, Geliang Tang wrote:
> > > This patch added the MP_FAIL suboption sending support.
> > > 
> > > Add a new flag named send_mp_fail in struct mptcp_subflow_context. If
> > > this flag is set, send out MP_FAIL suboption.
> > > 
> > > Add a new member fail_seq in struct mptcp_out_options to save the data
> > > sequence number to put into the MP_FAIL suboption.
> > > 
> > > Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> > > ---
> > >  include/net/mptcp.h  |  1 +
> > >  net/mptcp/options.c  | 54 ++++++++++++++++++++++++++++++++++++++++++--
> > >  net/mptcp/protocol.h |  4 ++++
> > >  3 files changed, 57 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > > index cb580b06152f..f48d3b5a3fd4 100644
> > > --- a/include/net/mptcp.h
> > > +++ b/include/net/mptcp.h
> > > @@ -72,6 +72,7 @@ struct mptcp_out_options {
> > >       u32 nonce;
> > >       u64 thmac;
> > >       u32 token;
> > > +     u64 fail_seq;
> > 
> > Since we can't have both a valid mapping and MP_FAIL in the same
> > packet, it would be better to avoid increasing the 'mptcp_out_options'
> > size, e.g. re-using some ext_copy field, or unioning with some other
> > field.
> 
> The RFC says an MP_FAIL option could be included in a RST or on the
> subflow-level ACK. So I think we can't re-using the ext_copy field if it's
> on the subflow-level ACK.
> 
> How about unioning this fail_seq field with ahmac field?
> 
> > mptcp_out_options has grown quite a bit and we should really look into
> > shrinking it.
> > 
> > >       u8 hmac[20];
> > >       struct mptcp_ext ext_copy;
> > >  #endif
> > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > > index b5850afea343..b78defe1aed9 100644
> > > --- a/net/mptcp/options.c
> > > +++ b/net/mptcp/options.c
> > > @@ -771,6 +771,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
> > >       opts->reset_reason = subflow->reset_reason;
> > >  }
> > > 
> > > +static bool mptcp_established_options_mp_fail(struct sock *sk,
> > > +                                           unsigned int *size,
> > > +                                           unsigned int remaining,
> > > +                                           struct mptcp_out_options *opts)
> > > +{
> > > +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> > > +
> > > +     if (!subflow->send_mp_fail)
> > > +             return false;
> > > +
> > > +     if (remaining < TCPOLEN_MPTCP_FAIL)
> > > +             return false;
> > > +
> > > +     *size = TCPOLEN_MPTCP_FAIL;
> > > +     opts->suboptions |= OPTION_MPTCP_FAIL;
> > > +     opts->fail_seq = subflow->fail_seq;
> > > +
> > > +     pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> > > +
> > > +     return true;
> > > +}
> > > +
> > >  bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> > >                              unsigned int *size, unsigned int remaining,
> > >                              struct mptcp_out_options *opts)
> > > @@ -787,15 +809,29 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> > >               return false;
> > > 
> > >       if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> > > -             mptcp_established_options_rst(sk, skb, size, remaining, opts);
> > > +             if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> > > +                     *size += opt_size;
> > > +                     remaining -= opt_size;
> > > +             }
> > > +             mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> > > +             *size += opt_size;
> > > +             remaining -= opt_size;
> > >               return true;
> > >       }
> > > 
> > >       snd_data_fin = mptcp_data_fin_enabled(msk);
> > >       if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> > >               ret = true;
> > > -     else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
> > > +     else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
> > >               ret = true;
> > > +             if (opts->ext_copy.use_ack) {
> > 
> > Is the extra check on 'opts->ext_copy.use_ack' necessary? can we just
> > look for mp_fail?
> > 
> 
> I added this check since the RFC says an MP_FAIL option could be included
> on the subflow-level ACK:
> 
> '''
> In this case, if a receiver identifies a checksum failure when there is
> only one path, it will send back an MP_FAIL option on the subflow-level
> ACK, referring to the data-level sequence number of the start of the
> segment on which the checksum error was detected.
> '''
> 
> Do I understand right?

Not a big deal, but I read the above as follow:

the MP_FAIL is included into the next subflow packet/

subflow-level ACK really means TCP-level ACK, I think. Since the
subflow is established (at TCP level) each TCP packet will carry an
ack.

All in all the ' if (opts->ext_copy.use_ack) {' check looks unneeded to
me.

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail
  2021-07-14  3:56             ` Geliang Tang
@ 2021-07-14 17:48               ` Mat Martineau
  0 siblings, 0 replies; 21+ messages in thread
From: Mat Martineau @ 2021-07-14 17:48 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

[-- Attachment #1: Type: text/plain, Size: 8478 bytes --]

On Wed, 14 Jul 2021, Geliang Tang wrote:

> Hi Mat,
>
> Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年7月14日周三 上午4:35写道：
>>
>> On Tue, 13 Jul 2021, Geliang Tang wrote:
>>
>>> Hi Mat,
>>>
>>> Sorry for late response on this.
>>>
>>> Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年7月8日周四 上午7:20写道：
>>>>
>>>> On Mon, 28 Jun 2021, Geliang Tang wrote:
>>>>
>>>>> When a bad checksum is detected, send out the MP_FAIL option.
>>>>>
>>>>> When multiple subflows are in use, close the affected subflow with a RST
>>>>> that includes an MP_FAIL option.
>>>>>
>>>>> When a single subfow is in use, send back an MP_FAIL option on the
>>>>> subflow-level ACK. And the receiver of this MP_FAIL respond with an
>>>>> MP_FAIL in the reverse direction.
>>>>>
>>>>> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
>>>>> ---
>>>>> net/mptcp/pm.c       | 10 ++++++++++
>>>>> net/mptcp/protocol.h | 14 ++++++++++++++
>>>>> net/mptcp/subflow.c  | 18 ++++++++++++++++++
>>>>> 3 files changed, 42 insertions(+)
>>>>>
>>>>> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
>>>>> index d4c19420194a..c34c9c0b2fa5 100644
>>>>> --- a/net/mptcp/pm.c
>>>>> +++ b/net/mptcp/pm.c
>>>>> @@ -250,8 +250,18 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
>>>>> void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
>>>>> {
>>>>>       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
>>>>> +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
>>>>>
>>>>>       pr_debug("map_seq=%llu fail_seq=%llu", subflow->map_seq, fail_seq);
>>>>> +
>>>>> +     if (!msk->pm.subflows) {
>>>>
>>>> The pm.lock isn't held so it's not safe to access pm.subflows
>>>>
>>>> I don't think it's sufficient to read pm.subflows with the lock or add
>>>> READ_ONCE/WRITE_ONCE, since that would still allow race conditions with
>>>> the msk. To handle fallback when receiving MP_FAIL I think the
>>>> sock_owned_by_user() checks and delegated callback (similar to
>>>> mptcp_subflow_process_delegated()) may be needed.
>>>>
>>>
>>> I think it's better to use the below mptcp_has_another_subflow() function
>>> here instead of using msk->pm.subflows to check the single subflow case.
>>>
>>
>> Yes, that sounds good.
>>
>>>>> +             if (!subflow->mp_fail_need_echo) {
>>>>> +                     subflow->send_mp_fail = 1;
>>>>> +                     subflow->fail_seq = fail_seq;
>>>>
>>>> Echoing the fail_seq back doesn't seem correct, from the RFC it seems like
>>>> this side should send a sequence number for the opposite data direction?
>>>> Do you agree?
>>>
>>> So we should use 'subflow->fail_seq = subflow->map_seq;' here?
>>
>> I think so - but I'm concerned about the possibility of out-of-order data
>> and checking for single- or multiple-subflow cases. I need to look at the
>> RFC some more to tell for sure.
>>
>>>
>>>>
>>>>> +             } else {
>>>>> +                     subflow->mp_fail_need_echo = 0;
>>>>> +             }
>>>>> +     }
>>>>> }
>>>>>
>>>>> /* path manager helpers */
>>>>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>>>>> index 8e050575a2d9..7a49454c77a6 100644
>>>>> --- a/net/mptcp/protocol.h
>>>>> +++ b/net/mptcp/protocol.h
>>>>> @@ -422,6 +422,7 @@ struct mptcp_subflow_context {
>>>>>               backup : 1,
>>>>>               send_mp_prio : 1,
>>>>>               send_mp_fail : 1,
>>>>> +             mp_fail_need_echo : 1,
>>>>
>>>> I think mp_fail_expect_echo would be a more accurate name.
>>>
>>> Updated in v4.
>>>
>>
>> Ok.
>>
>>>>
>>>>>               rx_eof : 1,
>>>>>               can_ack : 1,        /* only after processing the remote a key */
>>>>>               disposable : 1;     /* ctx can be free at ulp release time */
>>>>> @@ -594,6 +595,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
>>>>>       inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
>>>>> }
>>>>>
>>>>> +static inline bool mptcp_has_another_subflow_established(struct sock *ssk)
>>>>> +{
>>>>> +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
>>>>> +     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
>>>>> +
>>>>> +     mptcp_for_each_subflow(msk, tmp) {
>>>>> +             if (tmp->fully_established && tmp != subflow)
>>>>
>>>> Why check tmp->fully_established here?
>>>
>>> I'll drop tmp->fully_established, and rename this function
>>> mptcp_has_another_subflow to check whether there's a single subflow.
>>
>> Sounds good.
>>
>>>
>>>
>>> In my test, this MP_FAIL patchset only works in the multiple subflows case.
>>> The receiver detected the bad checksum, sent RST + MP_FAIL to close this
>>> subflow and discard the data with the bad checksum. Then the sender will
>>> retransmit a new data with good checksum from other subflow. And the
>>> receiver can resemble the whole data successfully.
>>>
>>> But the single subflow case dosen't work. I don't know how the new data
>>> can be retransmitted from the sender side when it fallback to regular TCP.
>>> So I'm not sure how to implement the single subflow case.
>>>
>>> Could you please give me some suggestions?
>>
>> Yes, the multiple subflow case is the simpler one this time. We can throw
>> away the affected subflow and recover using the others.
>>
>> Since the non-contiguous, single subflow case has to close the subflow,
>> that's more like the "easier" case.
>>
>> So, it's the infinite mapping case that's tricky. Have you looked at the
>> multipath-tcp.org code? The important part seems to be using the
>> information in the infinite mapping to determine what data to keep or
>> throw away before beginning "fallback" behavior. I can look at the
>> infinite mapping information in the RFC and see how that fits with our
>> implementation tomorrow.
>>
>
> Thanks for your help.
>
> I want to split this patchset into two part, "MP_FAIL support" and "the
> infinite mapping support".
>
> In the MP_FAIL part, just deal with the multiple subflows case, the easier
> case. Include the MP_FAIL sending and MP_FAIL receiving code in the part.
> This part had been implemented and tested. I'll send a v4 for it.
>
> And put the single subflow case into the new "infinite mapping" part. I'll
> add a new ticket on github to trace it, and send a RFC version for it later.
> It seems that there's still a lot of work to do in this part.
>
> Do you agree?

Yes, I think it would help to separate the single-subflow case like that. 
Thanks!

Mat


>>>>
>>>>> +                     return true;
>>>>> +     }
>>>>> +
>>>>> +     return false;
>>>>> +}
>>>>> +
>>>>> void __init mptcp_proto_init(void);
>>>>> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>>>>> int __init mptcp_proto_v6_init(void);
>>>>> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
>>>>> index 0b5d4a3eadcd..46302208c474 100644
>>>>> --- a/net/mptcp/subflow.c
>>>>> +++ b/net/mptcp/subflow.c
>>>>> @@ -913,6 +913,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
>>>>>       csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
>>>>>       if (unlikely(csum_fold(csum))) {
>>>>>               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
>>>>> +             subflow->send_mp_fail = 1;
>>>>> +             subflow->fail_seq = subflow->map_seq;
>>>>>               return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
>>>>>       }
>>>>>
>>>>> @@ -1160,6 +1162,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
>>>>>
>>>>> fallback:
>>>>>       /* RFC 8684 section 3.7. */
>>>>> +     if (subflow->send_mp_fail) {
>>>>> +             if (mptcp_has_another_subflow_established(ssk)) {
>>>>> +                     mptcp_subflow_reset(ssk);
>>>>> +                     while ((skb = skb_peek(&ssk->sk_receive_queue)))
>>>>> +                             sk_eat_skb(ssk, skb);
>>>>> +                     WRITE_ONCE(subflow->data_avail, 0);
>>>>> +                     return true;
>>>>> +             }
>>>>> +
>>>>> +             if (!msk->pm.subflows) {
>>>>> +                     subflow->mp_fail_need_echo = 1;
>>>>> +                     WRITE_ONCE(subflow->data_avail, 0);
>>>>> +                     return true;
>>>>> +             }
>>>>> +     }
>>>>> +
>>>>>       if (subflow->mp_join || subflow->fully_established) {
>>>>>               /* fatal protocol error, close the socket.
>>>>>                * subflow_error_report() will introduce the appropriate barriers
>>>>> --
>>>>> 2.31.1

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2021-07-14 17:48 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-28  4:28 [MPTCP][PATCH v3 mptcp-next 0/8] MP_FAIL support Geliang Tang
2021-06-28  4:28 ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Geliang Tang
2021-06-28  4:28   ` [MPTCP][PATCH v3 mptcp-next 2/8] mptcp: MP_FAIL suboption receiving Geliang Tang
2021-06-28  4:28     ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Geliang Tang
2021-06-28  4:29       ` [MPTCP][PATCH v3 mptcp-next 4/8] mptcp: add the mibs for MP_FAIL Geliang Tang
2021-06-28  4:29         ` [MPTCP][PATCH v3 mptcp-next 5/8] selftests: mptcp: add MP_FAIL mibs check Geliang Tang
2021-06-28  4:29           ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Geliang Tang
2021-06-28  4:29             ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Geliang Tang
2021-06-28  4:29               ` [MPTCP][PATCH v3 mptcp-next 8/8] mptcp: add a mib for the infinite mapping sending Geliang Tang
2021-07-07 23:49               ` [MPTCP][PATCH v3 mptcp-next 7/8] mptcp: infinite mapping receiving Mat Martineau
2021-07-07 23:44             ` [MPTCP][PATCH v3 mptcp-next 6/8] mptcp: infinite mapping sending Mat Martineau
2021-07-08  0:44               ` Mat Martineau
2021-07-07 23:20       ` [MPTCP][PATCH v3 mptcp-next 3/8] mptcp: send out MP_FAIL when data checksum fail Mat Martineau
2021-07-13 12:44         ` Geliang Tang
2021-07-13 20:35           ` Mat Martineau
2021-07-14  3:56             ` Geliang Tang
2021-07-14 17:48               ` Mat Martineau
2021-07-07 23:07   ` [MPTCP][PATCH v3 mptcp-next 1/8] mptcp: MP_FAIL suboption sending Mat Martineau
2021-07-14  8:45   ` Paolo Abeni
2021-07-14  9:16     ` Geliang Tang
2021-07-14 15:49       ` Paolo Abeni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).