mptcp.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [MPTCP] [MPTCP][RFC PATCH 0/2] DSS checksum and MP_FAIL support
@ 2021-03-10 12:39 Geliang Tang
  2021-03-10 12:39 ` [MPTCP] [MPTCP][RFC PATCH 1/2] mptcp: DSS checksum support Geliang Tang
  0 siblings, 1 reply; 4+ messages in thread
From: Geliang Tang @ 2021-03-10 12:39 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 744 bytes --]

This is the first RFC version of adding DSS checksum and MP_FAIL support,
addressed issue #134 and #52. It's not finish yet. Patch 1 had a bug in
it. The self test script mptcp_join.sh could pass, but mptcp_connect.sh
will fail sometimes. Patch 2 didn't implement the logic of receiving the
MP_FAIL, where I marked it with a TODO label.
 
I need some suggestions to improve this patchset.

Thanks.

Geliang Tang (2):
  mptcp: DSS checksum support
  mptcp: add MP_FAIL support

 include/net/mptcp.h  |   2 +
 net/mptcp/options.c  | 122 +++++++++++++++++++++++++++++++++++++++----
 net/mptcp/protocol.h |  13 +++++
 net/mptcp/subflow.c  |  42 ++++++++++++++-
 4 files changed, 169 insertions(+), 10 deletions(-)

-- 
2.29.2

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [MPTCP] [MPTCP][RFC PATCH 1/2] mptcp: DSS checksum support
@ 2021-03-10 12:39 ` Geliang Tang
  2021-03-10 12:39   ` [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support Geliang Tang
  0 siblings, 1 reply; 4+ messages in thread
From: Geliang Tang @ 2021-03-10 12:39 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 6995 bytes --]

Add DSS checksum support.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/134

Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 61 ++++++++++++++++++++++++++++++++++++++------
 net/mptcp/protocol.h |  8 ++++++
 net/mptcp/subflow.c  | 38 ++++++++++++++++++++++++++-
 4 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 16fe34d139c3..de88f38e60b1 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -32,6 +32,7 @@ struct mptcp_ext {
 			frozen:1,
 			reset_transient:1;
 	u8		reset_reason:4;
+	u16		csum;
 };
 
 #define MPTCP_RM_IDS_MAX	8
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index bf1b8497e091..9df26291cf9a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -69,11 +69,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		 * "If a checksum is not present when its use has been
 		 * negotiated, the receiver MUST close the subflow with a RST as
 		 * it is considered broken."
-		 *
-		 * We don't implement DSS checksum - fall back to TCP.
 		 */
 		if (flags & MPTCP_CAP_CHECKSUM_REQD)
-			break;
+			;
 
 		mp_opt->mp_capable = 1;
 		if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -208,9 +206,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			mp_opt->data_len = get_unaligned_be16(ptr);
 			ptr += 2;
 
-			pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+			if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+				mp_opt->csum = get_unaligned_be16(ptr);
+				ptr += 2;
+			}
+
+			pr_debug("%s data_seq=%llu subflow_seq=%u data_len=%u csum=%u", __func__,
 				 mp_opt->data_seq, mp_opt->subflow_seq,
-				 mp_opt->data_len);
+				 mp_opt->data_len, mp_opt->csum);
 		}
 
 		break;
@@ -340,6 +343,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 	mp_opt->dss = 0;
 	mp_opt->mp_prio = 0;
 	mp_opt->reset = 0;
+	mp_opt->csum = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -520,6 +524,34 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
 	}
 }
 
+static u16 mptcp_generate_dss_csum(struct sk_buff *skb)
+{
+	struct mptcp_ext *mpext;
+
+	if (!skb)
+		return 0;
+
+	mpext = mptcp_get_ext(skb);
+	if (mpext && mpext->use_map) {
+		struct csum_pseudo_header header;
+		__wsum csum;
+
+		header.data_seq = mpext->data_seq;
+		header.subflow_seq = mpext->subflow_seq;
+		header.data_len = mpext->data_len;
+		header.csum = 0;
+
+		csum = skb_checksum(skb, 0, skb->len, 0);
+		csum = csum_partial(&header, sizeof(header), csum);
+
+		pr_debug("%s data_seq=%llu subflow_seq=%u data_len=%u csum=%u\n",
+			 __func__, header.data_seq, header.subflow_seq, header.data_len, csum_fold(csum));
+		return csum_fold(csum);
+	}
+
+	return 0;
+}
+
 static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 					  bool snd_data_fin_enable,
 					  unsigned int *size,
@@ -543,8 +575,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 
 		remaining -= map_size;
 		dss_size = map_size;
-		if (mpext)
+		if (mpext) {
+			mpext->csum = mptcp_generate_dss_csum(skb);
 			opts->ext_copy = *mpext;
+		}
 
 		if (skb && snd_data_fin_enable)
 			mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
@@ -1141,6 +1175,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		}
 		mpext->data_len = mp_opt.data_len;
 		mpext->use_map = 1;
+
+		if (!subflow->mpc_map)
+			mpext->csum = mp_opt.csum;
 	}
 }
 
@@ -1349,6 +1386,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 			flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
 			if (mpext->data_fin)
 				flags |= MPTCP_DSS_DATA_FIN;
+
+			if (mpext->csum)
+				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
 		}
 
 		*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1368,8 +1408,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 			ptr += 2;
 			put_unaligned_be32(mpext->subflow_seq, ptr);
 			ptr += 1;
-			put_unaligned_be32(mpext->data_len << 16 |
-					   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+			if (mpext->csum) {
+				put_unaligned_be32(mpext->data_len << 16 |
+						   mpext->csum, ptr);
+			} else {
+				put_unaligned_be32(mpext->data_len << 16 |
+						   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+			}
 		}
 	}
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f9dcf49ffe33..24b4e1f6d23f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -126,6 +126,7 @@ struct mptcp_options_received {
 	u64	data_seq;
 	u32	subflow_seq;
 	u16	data_len;
+	u16	csum;
 	u16	mp_capable : 1,
 		mp_join : 1,
 		fastclose : 1,
@@ -356,6 +357,13 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
 }
 
+struct csum_pseudo_header {
+	u64 data_seq;
+	u32 subflow_seq;
+	u16 data_len;
+	u16 csum;
+};
+
 struct mptcp_subflow_request_sock {
 	struct	tcp_request_sock sk;
 	u16	mp_capable : 1,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bedbae99df2c..b597811a2f8d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -796,6 +796,42 @@ static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
 					  mptcp_subflow_get_map_offset(subflow);
 }
 
+static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+	struct csum_pseudo_header header;
+	struct mptcp_ext *mpext;
+	__wsum csum;
+
+	if (subflow->mpc_map)
+		goto out;
+	if (!skb)
+		goto out;
+
+	mpext = mptcp_get_ext(skb);
+	if (mpext && mpext->use_map && mpext->csum) {
+		header.data_seq = subflow->map_seq;
+		header.subflow_seq = subflow->map_subflow_seq;
+		header.data_len = subflow->map_data_len;
+		header.csum = mpext->csum;
+
+		csum = skb_checksum(skb, 0, skb->len, 0);
+		csum = csum_partial(&header, sizeof(header), csum);
+
+		pr_debug("%s data_seq=%llu subflow_seq=%u data_len=%u csum=%u",
+			 __func__, header.data_seq, header.subflow_seq, header.data_len, header.csum);
+
+		if (csum_fold(csum)) {
+			pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
+			return true; //false;
+		}
+		pr_debug("%s DSS checksum done", __func__);
+	}
+
+out:
+	return true;
+}
+
 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -814,7 +850,7 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 		warn_bad_map(subflow, ssn + skb->len);
 		return false;
 	}
-	return true;
+	return validate_dss_csum(ssk, skb);
 }
 
 static enum mapping_status get_mapping_status(struct sock *ssk,
-- 
2.29.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support
@ 2021-03-10 12:39   ` Geliang Tang
       [not found]     ` <974f96db-7160-793e-6d9a-ca919cb74a8@linux.intel.com>
  0 siblings, 1 reply; 4+ messages in thread
From: Geliang Tang @ 2021-03-10 12:39 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 5909 bytes --]

Add handling for sending and receiving MP_FAIL suboption.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52

Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/options.c  | 61 +++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |  5 ++++
 net/mptcp/subflow.c  |  4 +++
 4 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index de88f38e60b1..6635689ce03f 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -64,6 +64,7 @@ struct mptcp_out_options {
 	u32 nonce;
 	u64 thmac;
 	u32 token;
+	u64 fail_seq;
 	u8 hmac[20];
 	struct mptcp_ext ext_copy;
 #endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9df26291cf9a..1b5aaab80ba0 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		flags = *ptr++;
 		mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
 		mp_opt->reset_reason = *ptr;
+		pr_debug("RST: reset_reason=%u", mp_opt->reset_reason);
+		break;
+
+	case MPTCPOPT_MP_FAIL:
+		if (opsize != TCPOLEN_MPTCP_FAIL)
+			break;
+
+		ptr += 2;
+		mp_opt->mp_fail = 1;
+		mp_opt->fail_seq = get_unaligned_be64(ptr);
+		pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
 		break;
 
 	default:
@@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 	mp_opt->mp_prio = 0;
 	mp_opt->reset = 0;
 	mp_opt->csum = 0;
+	mp_opt->mp_fail = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
 	opts->reset_reason = subflow->reset_reason;
 }
 
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+					      unsigned int *size,
+					      unsigned int remaining,
+					      struct mptcp_out_options *opts)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (!subflow->send_mp_fail)
+		return false;
+
+	if (remaining < TCPOLEN_MPTCP_FAIL)
+		return false;
+
+	*size = TCPOLEN_MPTCP_FAIL;
+	opts->suboptions |= OPTION_MPTCP_FAIL;
+	opts->fail_seq = subflow->map_seq;
+
+	pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+	return true;
+}
+
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts)
@@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 		return false;
 
 	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
-		mptcp_established_options_rst(sk, skb, size, remaining, opts);
+		if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+			*size += opt_size;
+			remaining -= opt_size;
+		}
+		mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
+		*size += opt_size;
+		remaining -= opt_size;
 		return true;
 	}
 
@@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		mp_opt.mp_prio = 0;
 	}
 
+	if (mp_opt.mp_fail) {
+		/* TODO */
+		mp_opt.mp_fail = 0;
+	}
+
 	if (mp_opt.reset) {
 		subflow->reset_seen = 1;
 		subflow->reset_reason = mp_opt.reset_reason;
@@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 				      opts->backup, TCPOPT_NOP);
 	}
 
+	if (OPTION_MPTCP_FAIL & opts->suboptions) {
+		const struct sock *ssk = (const struct sock *)tp;
+		struct mptcp_subflow_context *subflow;
+
+		subflow = mptcp_subflow_ctx(ssk);
+		subflow->send_mp_fail = 0;
+
+		*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+				      TCPOLEN_MPTCP_FAIL,
+				      0, 0);
+		put_unaligned_be64(opts->fail_seq, ptr);
+		ptr += 2;
+	}
+
 	if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
 		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
 				      TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 24b4e1f6d23f..ab8f92c49029 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -27,6 +27,7 @@
 #define OPTION_MPTCP_FASTCLOSE	BIT(9)
 #define OPTION_MPTCP_PRIO	BIT(10)
 #define OPTION_MPTCP_RST	BIT(11)
+#define OPTION_MPTCP_FAIL	BIT(12)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -68,6 +69,7 @@
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
 #define TCPOLEN_MPTCP_RST		4
+#define TCPOLEN_MPTCP_FAIL		12
 
 /* MPTCP MP_JOIN flags */
 #define MPTCPOPT_BACKUP		BIT(0)
@@ -135,6 +137,7 @@ struct mptcp_options_received {
 		add_addr : 1,
 		rm_addr : 1,
 		mp_prio : 1,
+		mp_fail : 1,
 		family : 4,
 		echo : 1,
 		backup : 1;
@@ -162,6 +165,7 @@ struct mptcp_options_received {
 	u16	port;
 	u8	reset_reason:4;
 	u8	reset_transient:1;
+	u64	fail_seq;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -428,6 +432,7 @@ struct mptcp_subflow_context {
 		mpc_map : 1,
 		backup : 1,
 		send_mp_prio : 1,
+		send_mp_fail : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1;	    /* ctx can be free at ulp release time */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b597811a2f8d..059c1a0ef25b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
 
 		if (csum_fold(csum)) {
 			pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
+			subflow->send_mp_fail = 1;
+			subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+			tcp_send_active_reset(ssk, GFP_ATOMIC);
 			return true; //false;
 		}
 		pr_debug("%s DSS checksum done", __func__);
 	}
 
 out:
+	subflow->send_mp_fail = 0;
 	return true;
 }
 
-- 
2.29.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support
       [not found]     ` <974f96db-7160-793e-6d9a-ca919cb74a8@linux.intel.com>
@ 2021-06-21 11:53       ` Geliang Tang
  0 siblings, 0 replies; 4+ messages in thread
From: Geliang Tang @ 2021-06-21 11:53 UTC (permalink / raw)
  To: Mat Martineau; +Cc: Christoph Paasch, mptcp

Hi Mat,

Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年3月20日周六 上午9:35写道:
>
> On Wed, 10 Mar 2021, Geliang Tang wrote:
>
> > Add handling for sending and receiving MP_FAIL suboption.
> >
> > Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52
> >
> > Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> > ---
> > include/net/mptcp.h  |  1 +
> > net/mptcp/options.c  | 61 +++++++++++++++++++++++++++++++++++++++++++-
> > net/mptcp/protocol.h |  5 ++++
> > net/mptcp/subflow.c  |  4 +++
> > 4 files changed, 70 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index de88f38e60b1..6635689ce03f 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -64,6 +64,7 @@ struct mptcp_out_options {
> >       u32 nonce;
> >       u64 thmac;
> >       u32 token;
> > +     u64 fail_seq;
> >       u8 hmac[20];
> >       struct mptcp_ext ext_copy;
> > #endif
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index 9df26291cf9a..1b5aaab80ba0 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
> >               flags = *ptr++;
> >               mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
> >               mp_opt->reset_reason = *ptr;
> > +             pr_debug("RST: reset_reason=%u", mp_opt->reset_reason);
> > +             break;
> > +
> > +     case MPTCPOPT_MP_FAIL:
> > +             if (opsize != TCPOLEN_MPTCP_FAIL)
> > +                     break;
> > +
> > +             ptr += 2;
> > +             mp_opt->mp_fail = 1;
> > +             mp_opt->fail_seq = get_unaligned_be64(ptr);
> > +             pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
> >               break;
> >
> >       default:
> > @@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb,
> >       mp_opt->mp_prio = 0;
> >       mp_opt->reset = 0;
> >       mp_opt->csum = 0;
> > +     mp_opt->mp_fail = 0;
> >
> >       length = (th->doff * 4) - sizeof(struct tcphdr);
> >       ptr = (const unsigned char *)(th + 1);
> > @@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
> >       opts->reset_reason = subflow->reset_reason;
> > }
> >
> > +static bool mptcp_established_options_mp_fail(struct sock *sk,
> > +                                           unsigned int *size,
> > +                                           unsigned int remaining,
> > +                                           struct mptcp_out_options *opts)
> > +{
> > +     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> > +
> > +     if (!subflow->send_mp_fail)
> > +             return false;
> > +
> > +     if (remaining < TCPOLEN_MPTCP_FAIL)
> > +             return false;
> > +
> > +     *size = TCPOLEN_MPTCP_FAIL;
> > +     opts->suboptions |= OPTION_MPTCP_FAIL;
> > +     opts->fail_seq = subflow->map_seq;
> > +
> > +     pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> > +
> > +     return true;
> > +}
> > +
> > bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> >                              unsigned int *size, unsigned int remaining,
> >                              struct mptcp_out_options *opts)
> > @@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> >               return false;
> >
> >       if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> > -             mptcp_established_options_rst(sk, skb, size, remaining, opts);
> > +             if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> > +                     *size += opt_size;
> > +                     remaining -= opt_size;
> > +             }
> > +             mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> > +             *size += opt_size;
> > +             remaining -= opt_size;
> >               return true;
> >       }
> >
> > @@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
> >               mp_opt.mp_prio = 0;
> >       }
> >
> > +     if (mp_opt.mp_fail) {
> > +             /* TODO */
>
> It's not clear to me what RFC 8684 requires here.
>
> If Peer A detects the bad checksum, it sends MP_FAIL with fail_seq
> matching subflow->map_seq for the mapping being received (which the code
> above does).
>
> The RFC says the recipient of the option (Peer B) must discard data after
> fail_seq - but it's the transmitter of the data? It's very confusingly
> worded. I looked at the multipath-tcp.org kernel and the MP_FAIL sequence
> number appears to be ignored by the receiver.

After comparing RFC 8684 with RFC 6824, I think I have found the answer to
this question. It should be 'peer A must discard the data, not peer B'.

When multiple subflows are in use, if peer A detects the bad checksum, it
sends MP_FAIL+RST to close this subflow, and it discards data after
fail_seq.

Then peer B receives this MP_FAIL+RST, just ignore the MP_FAIL sequence
number, and do nothing. The fail_seq is only used in the single subflow
case, to send the infinite mapping. In the multiple subflows case, just
ignore the fail_seq.

I think the MP_FAIL logic should be like this:

 * Peer B send a DSS to peer A, and the data has been modify by the
middleboxes, then peer A detects the bad checksum.

 * In the multiple subflows case, peer A sends MP_FAIL+RST back to peer B,
and peer A discards the data following the bad data sequence number. Peer
B receives this MP_FAIL+RST, and close this subflow.

 * In the single subflow case, using the simple implementation, peer A
sends MP_FAIL back to peer B, and peer A fallback to a regular TCP. Peer
B receives this MP_FAIL, and fallback to a regular TCP.

I don't know if I understood right?

-Geliang

>
> So, maybe this is the right thing to do:
>
>   * When checksums are enabled, do not send DATA_ACK for received data
> until the full mapping is received and the checksum is verified. This
> allows retransmission of any data that's part of the bad checksum.
>
>   * When a bad checksum is detected, send the MP_FAIL and watch for the
> MP_FAIL reply. I'm not sure of the conditions for resending MP_FAIL.
>
>   * When MP_FAIL is received, send MP_FAIL+RST for the mapping currently
> being received on that subflow and fallback if the in-order-single-subflow
> conditions in the RFC are met.
>
> Christoph, is that on the right track? (If so, is it time for an errata
> submission? :) )
>
>
> - Mat
>
>
>
> > +             mp_opt.mp_fail = 0;
> > +     }
> > +
> >       if (mp_opt.reset) {
> >               subflow->reset_seen = 1;
> >               subflow->reset_reason = mp_opt.reset_reason;
> > @@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> >                                     opts->backup, TCPOPT_NOP);
> >       }
> >
> > +     if (OPTION_MPTCP_FAIL & opts->suboptions) {
> > +             const struct sock *ssk = (const struct sock *)tp;
> > +             struct mptcp_subflow_context *subflow;
> > +
> > +             subflow = mptcp_subflow_ctx(ssk);
> > +             subflow->send_mp_fail = 0;
> > +
> > +             *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
> > +                                   TCPOLEN_MPTCP_FAIL,
> > +                                   0, 0);
> > +             put_unaligned_be64(opts->fail_seq, ptr);
> > +             ptr += 2;
> > +     }
> > +
> >       if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
> >               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
> >                                     TCPOLEN_MPTCP_MPJ_SYN,
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 24b4e1f6d23f..ab8f92c49029 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -27,6 +27,7 @@
> > #define OPTION_MPTCP_FASTCLOSE        BIT(9)
> > #define OPTION_MPTCP_PRIO     BIT(10)
> > #define OPTION_MPTCP_RST      BIT(11)
> > +#define OPTION_MPTCP_FAIL    BIT(12)
> >
> > /* MPTCP option subtypes */
> > #define MPTCPOPT_MP_CAPABLE   0
> > @@ -68,6 +69,7 @@
> > #define TCPOLEN_MPTCP_PRIO_ALIGN      4
> > #define TCPOLEN_MPTCP_FASTCLOSE               12
> > #define TCPOLEN_MPTCP_RST             4
> > +#define TCPOLEN_MPTCP_FAIL           12
> >
> > /* MPTCP MP_JOIN flags */
> > #define MPTCPOPT_BACKUP               BIT(0)
> > @@ -135,6 +137,7 @@ struct mptcp_options_received {
> >               add_addr : 1,
> >               rm_addr : 1,
> >               mp_prio : 1,
> > +             mp_fail : 1,
> >               family : 4,
> >               echo : 1,
> >               backup : 1;
> > @@ -162,6 +165,7 @@ struct mptcp_options_received {
> >       u16     port;
> >       u8      reset_reason:4;
> >       u8      reset_transient:1;
> > +     u64     fail_seq;
> > };
> >
> > static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
> > @@ -428,6 +432,7 @@ struct mptcp_subflow_context {
> >               mpc_map : 1,
> >               backup : 1,
> >               send_mp_prio : 1,
> > +             send_mp_fail : 1,
> >               rx_eof : 1,
> >               can_ack : 1,        /* only after processing the remote a key */
> >               disposable : 1;     /* ctx can be free at ulp release time */
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index b597811a2f8d..059c1a0ef25b 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
> >
> >               if (csum_fold(csum)) {
> >                       pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
> > +                     subflow->send_mp_fail = 1;
> > +                     subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
>
> This is sending a MP_FAIL not MP_TCPRST, so a reason isn't used.
>
> > +                     tcp_send_active_reset(ssk, GFP_ATOMIC);
>
> The subflow socket must be closed to. Does it work to call
> mptcp_subflow_reset() instead?
>
> I don't think there should be data cleanup to do because the data can't be
> pushed up to the msk until the entire mapping is received.
>
>
> Mat
>
> >                       return true; //false;
> >               }
> >               pr_debug("%s DSS checksum done", __func__);
> >       }
> >
> > out:
> > +     subflow->send_mp_fail = 0;
> >       return true;
> > }
> >
> > --
> > 2.29.2
>
> --
> Mat Martineau
> Intel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-06-21 11:53 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-10 12:39 [MPTCP] [MPTCP][RFC PATCH 0/2] DSS checksum and MP_FAIL support Geliang Tang
2021-03-10 12:39 ` [MPTCP] [MPTCP][RFC PATCH 1/2] mptcp: DSS checksum support Geliang Tang
2021-03-10 12:39   ` [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support Geliang Tang
     [not found]     ` <974f96db-7160-793e-6d9a-ca919cb74a8@linux.intel.com>
2021-06-21 11:53       ` Geliang Tang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).