* [MPTCP] [MPTCP][RFC PATCH 0/2] DSS checksum and MP_FAIL support
@ 2021-03-10 12:39 Geliang Tang
2021-03-10 12:39 ` [MPTCP] [MPTCP][RFC PATCH 1/2] mptcp: DSS checksum support Geliang Tang
0 siblings, 1 reply; 4+ messages in thread
From: Geliang Tang @ 2021-03-10 12:39 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 744 bytes --]
This is the first RFC version of adding DSS checksum and MP_FAIL support,
addressed issue #134 and #52. It's not finish yet. Patch 1 had a bug in
it. The self test script mptcp_join.sh could pass, but mptcp_connect.sh
will fail sometimes. Patch 2 didn't implement the logic of receiving the
MP_FAIL, where I marked it with a TODO label.
I need some suggestions to improve this patchset.
Thanks.
Geliang Tang (2):
mptcp: DSS checksum support
mptcp: add MP_FAIL support
include/net/mptcp.h | 2 +
net/mptcp/options.c | 122 +++++++++++++++++++++++++++++++++++++++----
net/mptcp/protocol.h | 13 +++++
net/mptcp/subflow.c | 42 ++++++++++++++-
4 files changed, 169 insertions(+), 10 deletions(-)
--
2.29.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [MPTCP] [MPTCP][RFC PATCH 1/2] mptcp: DSS checksum support
@ 2021-03-10 12:39 ` Geliang Tang
2021-03-10 12:39 ` [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support Geliang Tang
0 siblings, 1 reply; 4+ messages in thread
From: Geliang Tang @ 2021-03-10 12:39 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 6995 bytes --]
Add DSS checksum support.
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/134
Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
---
include/net/mptcp.h | 1 +
net/mptcp/options.c | 61 ++++++++++++++++++++++++++++++++++++++------
net/mptcp/protocol.h | 8 ++++++
net/mptcp/subflow.c | 38 ++++++++++++++++++++++++++-
4 files changed, 99 insertions(+), 9 deletions(-)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 16fe34d139c3..de88f38e60b1 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -32,6 +32,7 @@ struct mptcp_ext {
frozen:1,
reset_transient:1;
u8 reset_reason:4;
+ u16 csum;
};
#define MPTCP_RM_IDS_MAX 8
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index bf1b8497e091..9df26291cf9a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -69,11 +69,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* "If a checksum is not present when its use has been
* negotiated, the receiver MUST close the subflow with a RST as
* it is considered broken."
- *
- * We don't implement DSS checksum - fall back to TCP.
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- break;
+ ;
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -208,9 +206,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->csum = get_unaligned_be16(ptr);
+ ptr += 2;
+ }
+
+ pr_debug("%s data_seq=%llu subflow_seq=%u data_len=%u csum=%u", __func__,
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len);
+ mp_opt->data_len, mp_opt->csum);
}
break;
@@ -340,6 +343,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
+ mp_opt->csum = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -520,6 +524,34 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
}
}
+static u16 mptcp_generate_dss_csum(struct sk_buff *skb)
+{
+ struct mptcp_ext *mpext;
+
+ if (!skb)
+ return 0;
+
+ mpext = mptcp_get_ext(skb);
+ if (mpext && mpext->use_map) {
+ struct csum_pseudo_header header;
+ __wsum csum;
+
+ header.data_seq = mpext->data_seq;
+ header.subflow_seq = mpext->subflow_seq;
+ header.data_len = mpext->data_len;
+ header.csum = 0;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ csum = csum_partial(&header, sizeof(header), csum);
+
+ pr_debug("%s data_seq=%llu subflow_seq=%u data_len=%u csum=%u\n",
+ __func__, header.data_seq, header.subflow_seq, header.data_len, csum_fold(csum));
+ return csum_fold(csum);
+ }
+
+ return 0;
+}
+
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
@@ -543,8 +575,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
remaining -= map_size;
dss_size = map_size;
- if (mpext)
+ if (mpext) {
+ mpext->csum = mptcp_generate_dss_csum(skb);
opts->ext_copy = *mpext;
+ }
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
@@ -1141,6 +1175,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
+
+ if (!subflow->mpc_map)
+ mpext->csum = mp_opt.csum;
}
}
@@ -1349,6 +1386,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
+
+ if (mpext->csum)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1368,8 +1408,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (mpext->csum) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mpext->csum, ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
}
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f9dcf49ffe33..24b4e1f6d23f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -126,6 +126,7 @@ struct mptcp_options_received {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
+ u16 csum;
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
@@ -356,6 +357,13 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
+struct csum_pseudo_header {
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ u16 csum;
+};
+
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bedbae99df2c..b597811a2f8d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -796,6 +796,42 @@ static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
mptcp_subflow_get_map_offset(subflow);
}
+static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct csum_pseudo_header header;
+ struct mptcp_ext *mpext;
+ __wsum csum;
+
+ if (subflow->mpc_map)
+ goto out;
+ if (!skb)
+ goto out;
+
+ mpext = mptcp_get_ext(skb);
+ if (mpext && mpext->use_map && mpext->csum) {
+ header.data_seq = subflow->map_seq;
+ header.subflow_seq = subflow->map_subflow_seq;
+ header.data_len = subflow->map_data_len;
+ header.csum = mpext->csum;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ csum = csum_partial(&header, sizeof(header), csum);
+
+ pr_debug("%s data_seq=%llu subflow_seq=%u data_len=%u csum=%u",
+ __func__, header.data_seq, header.subflow_seq, header.data_len, header.csum);
+
+ if (csum_fold(csum)) {
+ pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
+ return true; //false;
+ }
+ pr_debug("%s DSS checksum done", __func__);
+ }
+
+out:
+ return true;
+}
+
static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -814,7 +850,7 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
warn_bad_map(subflow, ssn + skb->len);
return false;
}
- return true;
+ return validate_dss_csum(ssk, skb);
}
static enum mapping_status get_mapping_status(struct sock *ssk,
--
2.29.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support
@ 2021-03-10 12:39 ` Geliang Tang
[not found] ` <974f96db-7160-793e-6d9a-ca919cb74a8@linux.intel.com>
0 siblings, 1 reply; 4+ messages in thread
From: Geliang Tang @ 2021-03-10 12:39 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 5909 bytes --]
Add handling for sending and receiving MP_FAIL suboption.
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52
Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
---
include/net/mptcp.h | 1 +
net/mptcp/options.c | 61 +++++++++++++++++++++++++++++++++++++++++++-
net/mptcp/protocol.h | 5 ++++
net/mptcp/subflow.c | 4 +++
4 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index de88f38e60b1..6635689ce03f 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -64,6 +64,7 @@ struct mptcp_out_options {
u32 nonce;
u64 thmac;
u32 token;
+ u64 fail_seq;
u8 hmac[20];
struct mptcp_ext ext_copy;
#endif
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9df26291cf9a..1b5aaab80ba0 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
+ pr_debug("RST: reset_reason=%u", mp_opt->reset_reason);
+ break;
+
+ case MPTCPOPT_MP_FAIL:
+ if (opsize != TCPOLEN_MPTCP_FAIL)
+ break;
+
+ ptr += 2;
+ mp_opt->mp_fail = 1;
+ mp_opt->fail_seq = get_unaligned_be64(ptr);
+ pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
break;
default:
@@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
mp_opt->csum = 0;
+ mp_opt->mp_fail = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
opts->reset_reason = subflow->reset_reason;
}
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (!subflow->send_mp_fail)
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_FAIL)
+ return false;
+
+ *size = TCPOLEN_MPTCP_FAIL;
+ opts->suboptions |= OPTION_MPTCP_FAIL;
+ opts->fail_seq = subflow->map_seq;
+
+ pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+ return true;
+}
+
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
@@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
- mptcp_established_options_rst(sk, skb, size, remaining, opts);
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
+ mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
+ *size += opt_size;
+ remaining -= opt_size;
return true;
}
@@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mp_opt.mp_prio = 0;
}
+ if (mp_opt.mp_fail) {
+ /* TODO */
+ mp_opt.mp_fail = 0;
+ }
+
if (mp_opt.reset) {
subflow->reset_seen = 1;
subflow->reset_reason = mp_opt.reset_reason;
@@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
opts->backup, TCPOPT_NOP);
}
+ if (OPTION_MPTCP_FAIL & opts->suboptions) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_fail = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+ TCPOLEN_MPTCP_FAIL,
+ 0, 0);
+ put_unaligned_be64(opts->fail_seq, ptr);
+ ptr += 2;
+ }
+
if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 24b4e1f6d23f..ab8f92c49029 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -27,6 +27,7 @@
#define OPTION_MPTCP_FASTCLOSE BIT(9)
#define OPTION_MPTCP_PRIO BIT(10)
#define OPTION_MPTCP_RST BIT(11)
+#define OPTION_MPTCP_FAIL BIT(12)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -68,6 +69,7 @@
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_FAIL 12
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
@@ -135,6 +137,7 @@ struct mptcp_options_received {
add_addr : 1,
rm_addr : 1,
mp_prio : 1,
+ mp_fail : 1,
family : 4,
echo : 1,
backup : 1;
@@ -162,6 +165,7 @@ struct mptcp_options_received {
u16 port;
u8 reset_reason:4;
u8 reset_transient:1;
+ u64 fail_seq;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -428,6 +432,7 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
+ send_mp_fail : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1; /* ctx can be free at ulp release time */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b597811a2f8d..059c1a0ef25b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
if (csum_fold(csum)) {
pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
+ subflow->send_mp_fail = 1;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
return true; //false;
}
pr_debug("%s DSS checksum done", __func__);
}
out:
+ subflow->send_mp_fail = 0;
return true;
}
--
2.29.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support
[not found] ` <974f96db-7160-793e-6d9a-ca919cb74a8@linux.intel.com>
@ 2021-06-21 11:53 ` Geliang Tang
0 siblings, 0 replies; 4+ messages in thread
From: Geliang Tang @ 2021-06-21 11:53 UTC (permalink / raw)
To: Mat Martineau; +Cc: Christoph Paasch, mptcp
Hi Mat,
Mat Martineau <mathew.j.martineau@linux.intel.com> 于2021年3月20日周六 上午9:35写道:
>
> On Wed, 10 Mar 2021, Geliang Tang wrote:
>
> > Add handling for sending and receiving MP_FAIL suboption.
> >
> > Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/52
> >
> > Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> > ---
> > include/net/mptcp.h | 1 +
> > net/mptcp/options.c | 61 +++++++++++++++++++++++++++++++++++++++++++-
> > net/mptcp/protocol.h | 5 ++++
> > net/mptcp/subflow.c | 4 +++
> > 4 files changed, 70 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index de88f38e60b1..6635689ce03f 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -64,6 +64,7 @@ struct mptcp_out_options {
> > u32 nonce;
> > u64 thmac;
> > u32 token;
> > + u64 fail_seq;
> > u8 hmac[20];
> > struct mptcp_ext ext_copy;
> > #endif
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index 9df26291cf9a..1b5aaab80ba0 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -318,6 +318,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
> > flags = *ptr++;
> > mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
> > mp_opt->reset_reason = *ptr;
> > + pr_debug("RST: reset_reason=%u", mp_opt->reset_reason);
> > + break;
> > +
> > + case MPTCPOPT_MP_FAIL:
> > + if (opsize != TCPOLEN_MPTCP_FAIL)
> > + break;
> > +
> > + ptr += 2;
> > + mp_opt->mp_fail = 1;
> > + mp_opt->fail_seq = get_unaligned_be64(ptr);
> > + pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
> > break;
> >
> > default:
> > @@ -344,6 +355,7 @@ void mptcp_get_options(const struct sk_buff *skb,
> > mp_opt->mp_prio = 0;
> > mp_opt->reset = 0;
> > mp_opt->csum = 0;
> > + mp_opt->mp_fail = 0;
> >
> > length = (th->doff * 4) - sizeof(struct tcphdr);
> > ptr = (const unsigned char *)(th + 1);
> > @@ -787,6 +799,28 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
> > opts->reset_reason = subflow->reset_reason;
> > }
> >
> > +static bool mptcp_established_options_mp_fail(struct sock *sk,
> > + unsigned int *size,
> > + unsigned int remaining,
> > + struct mptcp_out_options *opts)
> > +{
> > + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> > +
> > + if (!subflow->send_mp_fail)
> > + return false;
> > +
> > + if (remaining < TCPOLEN_MPTCP_FAIL)
> > + return false;
> > +
> > + *size = TCPOLEN_MPTCP_FAIL;
> > + opts->suboptions |= OPTION_MPTCP_FAIL;
> > + opts->fail_seq = subflow->map_seq;
> > +
> > + pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
> > +
> > + return true;
> > +}
> > +
> > bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> > unsigned int *size, unsigned int remaining,
> > struct mptcp_out_options *opts)
> > @@ -803,7 +837,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> > return false;
> >
> > if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
> > - mptcp_established_options_rst(sk, skb, size, remaining, opts);
> > + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
> > + *size += opt_size;
> > + remaining -= opt_size;
> > + }
> > + mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts);
> > + *size += opt_size;
> > + remaining -= opt_size;
> > return true;
> > }
> >
> > @@ -1120,6 +1160,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
> > mp_opt.mp_prio = 0;
> > }
> >
> > + if (mp_opt.mp_fail) {
> > + /* TODO */
>
> It's not clear to me what RFC 8684 requires here.
>
> If Peer A detects the bad checksum, it sends MP_FAIL with fail_seq
> matching subflow->map_seq for the mapping being received (which the code
> above does).
>
> The RFC says the recipient of the option (Peer B) must discard data after
> fail_seq - but it's the transmitter of the data? It's very confusingly
> worded. I looked at the multipath-tcp.org kernel and the MP_FAIL sequence
> number appears to be ignored by the receiver.
After comparing RFC 8684 with RFC 6824, I think I have found the answer to
this question. It should be 'peer A must discard the data, not peer B'.
When multiple subflows are in use, if peer A detects the bad checksum, it
sends MP_FAIL+RST to close this subflow, and it discards data after
fail_seq.
Then peer B receives this MP_FAIL+RST, just ignore the MP_FAIL sequence
number, and do nothing. The fail_seq is only used in the single subflow
case, to send the infinite mapping. In the multiple subflows case, just
ignore the fail_seq.
I think the MP_FAIL logic should be like this:
* Peer B send a DSS to peer A, and the data has been modify by the
middleboxes, then peer A detects the bad checksum.
* In the multiple subflows case, peer A sends MP_FAIL+RST back to peer B,
and peer A discards the data following the bad data sequence number. Peer
B receives this MP_FAIL+RST, and close this subflow.
* In the single subflow case, using the simple implementation, peer A
sends MP_FAIL back to peer B, and peer A fallback to a regular TCP. Peer
B receives this MP_FAIL, and fallback to a regular TCP.
I don't know if I understood right?
-Geliang
>
> So, maybe this is the right thing to do:
>
> * When checksums are enabled, do not send DATA_ACK for received data
> until the full mapping is received and the checksum is verified. This
> allows retransmission of any data that's part of the bad checksum.
>
> * When a bad checksum is detected, send the MP_FAIL and watch for the
> MP_FAIL reply. I'm not sure of the conditions for resending MP_FAIL.
>
> * When MP_FAIL is received, send MP_FAIL+RST for the mapping currently
> being received on that subflow and fallback if the in-order-single-subflow
> conditions in the RFC are met.
>
> Christoph, is that on the right track? (If so, is it time for an errata
> submission? :) )
>
>
> - Mat
>
>
>
> > + mp_opt.mp_fail = 0;
> > + }
> > +
> > if (mp_opt.reset) {
> > subflow->reset_seen = 1;
> > subflow->reset_reason = mp_opt.reset_reason;
> > @@ -1329,6 +1374,20 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> > opts->backup, TCPOPT_NOP);
> > }
> >
> > + if (OPTION_MPTCP_FAIL & opts->suboptions) {
> > + const struct sock *ssk = (const struct sock *)tp;
> > + struct mptcp_subflow_context *subflow;
> > +
> > + subflow = mptcp_subflow_ctx(ssk);
> > + subflow->send_mp_fail = 0;
> > +
> > + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
> > + TCPOLEN_MPTCP_FAIL,
> > + 0, 0);
> > + put_unaligned_be64(opts->fail_seq, ptr);
> > + ptr += 2;
> > + }
> > +
> > if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
> > *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
> > TCPOLEN_MPTCP_MPJ_SYN,
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 24b4e1f6d23f..ab8f92c49029 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -27,6 +27,7 @@
> > #define OPTION_MPTCP_FASTCLOSE BIT(9)
> > #define OPTION_MPTCP_PRIO BIT(10)
> > #define OPTION_MPTCP_RST BIT(11)
> > +#define OPTION_MPTCP_FAIL BIT(12)
> >
> > /* MPTCP option subtypes */
> > #define MPTCPOPT_MP_CAPABLE 0
> > @@ -68,6 +69,7 @@
> > #define TCPOLEN_MPTCP_PRIO_ALIGN 4
> > #define TCPOLEN_MPTCP_FASTCLOSE 12
> > #define TCPOLEN_MPTCP_RST 4
> > +#define TCPOLEN_MPTCP_FAIL 12
> >
> > /* MPTCP MP_JOIN flags */
> > #define MPTCPOPT_BACKUP BIT(0)
> > @@ -135,6 +137,7 @@ struct mptcp_options_received {
> > add_addr : 1,
> > rm_addr : 1,
> > mp_prio : 1,
> > + mp_fail : 1,
> > family : 4,
> > echo : 1,
> > backup : 1;
> > @@ -162,6 +165,7 @@ struct mptcp_options_received {
> > u16 port;
> > u8 reset_reason:4;
> > u8 reset_transient:1;
> > + u64 fail_seq;
> > };
> >
> > static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
> > @@ -428,6 +432,7 @@ struct mptcp_subflow_context {
> > mpc_map : 1,
> > backup : 1,
> > send_mp_prio : 1,
> > + send_mp_fail : 1,
> > rx_eof : 1,
> > can_ack : 1, /* only after processing the remote a key */
> > disposable : 1; /* ctx can be free at ulp release time */
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index b597811a2f8d..059c1a0ef25b 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -823,12 +823,16 @@ static bool validate_dss_csum(struct sock *ssk, struct sk_buff *skb)
> >
> > if (csum_fold(csum)) {
> > pr_err("%s DSS checksum error csum=%u!", __func__, csum_fold(csum));
> > + subflow->send_mp_fail = 1;
> > + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
>
> This is sending a MP_FAIL not MP_TCPRST, so a reason isn't used.
>
> > + tcp_send_active_reset(ssk, GFP_ATOMIC);
>
> The subflow socket must be closed to. Does it work to call
> mptcp_subflow_reset() instead?
>
> I don't think there should be data cleanup to do because the data can't be
> pushed up to the msk until the entire mapping is received.
>
>
> Mat
>
> > return true; //false;
> > }
> > pr_debug("%s DSS checksum done", __func__);
> > }
> >
> > out:
> > + subflow->send_mp_fail = 0;
> > return true;
> > }
> >
> > --
> > 2.29.2
>
> --
> Mat Martineau
> Intel
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-06-21 11:53 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-10 12:39 [MPTCP] [MPTCP][RFC PATCH 0/2] DSS checksum and MP_FAIL support Geliang Tang
2021-03-10 12:39 ` [MPTCP] [MPTCP][RFC PATCH 1/2] mptcp: DSS checksum support Geliang Tang
2021-03-10 12:39 ` [MPTCP] [MPTCP][RFC PATCH 2/2] mptcp: add MP_FAIL support Geliang Tang
[not found] ` <974f96db-7160-793e-6d9a-ca919cb74a8@linux.intel.com>
2021-06-21 11:53 ` Geliang Tang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).