* [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers
@ 2020-07-17 18:28 Mat Martineau
0 siblings, 0 replies; 2+ messages in thread
From: Mat Martineau @ 2020-07-17 18:28 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 6991 bytes --]
On Thu, 16 Jul 2020, Paolo Abeni wrote:
> On Wed, 2020-07-15 at 17:20 -0700, Mat Martineau wrote:
>> Incoming DATA_FIN headers need to propagate the presence of the DATA_FIN
>> bit and the associated sequence number to the MPTCP layer, even when
>> arriving on a bare ACK that does not get added to the receive queue. Add
>> structure members to store the DATA_FIN information and helpers to set
>> and check those values.
>>
>> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
>> ---
>> net/mptcp/options.c | 16 +++++++
>> net/mptcp/protocol.c | 100 ++++++++++++++++++++++++++++++++++++++-----
>> net/mptcp/protocol.h | 3 ++
>> 3 files changed, 109 insertions(+), 10 deletions(-)
>>
>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>> index 756a636d3d3a..db34306eb73f 100644
>> --- a/net/mptcp/options.c
>> +++ b/net/mptcp/options.c
>> @@ -775,6 +775,22 @@ static void update_una(struct mptcp_sock *msk,
>> }
>> }
>>
>> +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
>> +{
>> + /* Skip if DATA_FIN was already received.
>> + * If updating simultaneously with the recvmsg loop, values
>> + * should match. If they mismatch, the peer is misbehaving and
>> + * we will prefer the most recent information.
>> + */
>> + if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
>> + return false;
>> +
>> + WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
>> + WRITE_ONCE(msk->rcv_data_fin, 1);
>> +
>> + return true;
>> +}
>> +
>> static bool add_addr_hmac_valid(struct mptcp_sock *msk,
>> struct mptcp_options_received *mp_opt)
>> {
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 69100187490f..43e48327902e 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -16,6 +16,7 @@
>> #include <net/inet_hashtables.h>
>> #include <net/protocol.h>
>> #include <net/tcp.h>
>> +#include <net/tcp_states.h>
>> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>> #include <net/transp_v6.h>
>> #endif
>> @@ -163,6 +164,95 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
>> return mptcp_subflow_data_available(ssk);
>> }
>>
>> +static bool mptcp_pending_data_fin(struct sock *sk)
>> +{
>> + struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> + return (READ_ONCE(msk->rcv_data_fin) &&
>> + ((1 << sk->sk_state) &
>> + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)) &&
>> + msk->ack_seq == READ_ONCE(msk->rcv_data_fin_seq));
>> +}
>> +
>> +static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
>> +{
>> + long tout = ssk && inet_csk(ssk)->icsk_pending ?
>> + inet_csk(ssk)->icsk_timeout - jiffies : 0;
>> +
>> + if (tout <= 0)
>> + tout = mptcp_sk(sk)->timer_ival;
>> + mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
>> +}
>> +
>> +static void mptcp_check_data_fin(struct sock *sk)
>> +{
>> + struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> + if (__mptcp_check_fallback(msk) || !msk->first)
>> + return;
>> +
>> + /* Need to ack a DATA_FIN received from a peer while this side
>> + * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
>> + * msk->rcv_data_fin was set when parsing the incoming options
>> + * at the subflow level and the msk lock was not held, so this
>> + * is the first opportunity to act on the DATA_FIN and change
>> + * the msk state.
>> + *
>> + * If we are caught up to the sequence number of the incoming
>> + * DATA_FIN, send the DATA_ACK now and do state transition. If
>> + * not caught up, do nothing and let the recv code send DATA_ACK
>> + * when catching up.
>> + */
>> +
>> + if (READ_ONCE(msk->rcv_data_fin) &&
>> + ((1 << sk->sk_state) &
>> + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
>> + u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
>> + struct mptcp_subflow_context *subflow;
>> +
>> + if (msk->ack_seq + 1 == rcv_data_fin_seq) {
>
> This is quite alike mptcp_pending_data_fin(), so perhaps the helper
> could be reused here? see also the comment on patch 8/12.
>
Right, I'll fix that.
>> + msk->ack_seq++;
>> + WRITE_ONCE(msk->rcv_data_fin, 0);
>> +
>> + sk->sk_shutdown |= RCV_SHUTDOWN;
>> +
>> + switch (sk->sk_state) {
>> + case TCP_ESTABLISHED:
>> + inet_sk_state_store(sk, TCP_CLOSE_WAIT);
>> + break;
>> + case TCP_FIN_WAIT1:
>> + inet_sk_state_store(sk, TCP_CLOSING);
>> + break;
>> + case TCP_FIN_WAIT2:
>> + inet_sk_state_store(sk, TCP_CLOSE);
>> + // @@ Close subflows now?
>> + break;
>> + default:
>> + /* Other states not expected */
>> + WARN_ON_ONCE(1);
>> + break;
>
> it's a pity we can't use mptcp_close_state() here.
Yeah, this spot is more like tcp_fin(), which has different transitions
from tcp_close_state(). Taking a second look at that, the WARN_ON_ONCE(1)
doesn't seem right, since there may be retransmissions of DATA_FIN in a
few other states.
>
>> + }
>> +
>> + mptcp_set_timeout(sk, NULL);
>> + mptcp_for_each_subflow(msk, subflow) {
>> + struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
>> +
>> + lock_sock(ssk);
>> + tcp_send_ack(ssk);
>> + release_sock(ssk);
>> + }
>> +
>> + sk->sk_state_change(sk);
>> +
>> + if (sk->sk_shutdown == SHUTDOWN_MASK ||
>> + sk->sk_state == TCP_CLOSE)
>> + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
>> + else
>> + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
>> + }
>> + }
>> +}
>> +
>> static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>> struct sock *ssk,
>> unsigned int *bytes)
>> @@ -303,16 +393,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
>> spin_unlock_bh(&msk->join_list_lock);
>> }
>>
>> -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
>> -{
>> - long tout = ssk && inet_csk(ssk)->icsk_pending ?
>> - inet_csk(ssk)->icsk_timeout - jiffies : 0;
>> -
>> - if (tout <= 0)
>> - tout = mptcp_sk(sk)->timer_ival;
>> - mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
>> -}
>> -
>> static bool mptcp_timer_pending(struct sock *sk)
>> {
>> return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index 08198b61b8a6..813c9d5c59f2 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -193,11 +193,13 @@ struct mptcp_sock {
>> u64 remote_key;
>> u64 write_seq;
>> u64 ack_seq;
>> + u64 rcv_data_fin_seq;
>> atomic64_t snd_una;
>> unsigned long timer_ival;
>> u32 token;
>> unsigned long flags;
>> bool can_ack;
>> + bool rcv_data_fin;
>> spinlock_t join_list_lock;
>> struct work_struct work;
>> struct list_head conn_list;
>
> Note: we should revisit mptcp_sock binary layout someday ;)
Agreed!
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 2+ messages in thread
* [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers
@ 2020-07-16 13:33 Paolo Abeni
0 siblings, 0 replies; 2+ messages in thread
From: Paolo Abeni @ 2020-07-16 13:33 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 6455 bytes --]
On Wed, 2020-07-15 at 17:20 -0700, Mat Martineau wrote:
> Incoming DATA_FIN headers need to propagate the presence of the DATA_FIN
> bit and the associated sequence number to the MPTCP layer, even when
> arriving on a bare ACK that does not get added to the receive queue. Add
> structure members to store the DATA_FIN information and helpers to set
> and check those values.
>
> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
> ---
> net/mptcp/options.c | 16 +++++++
> net/mptcp/protocol.c | 100 ++++++++++++++++++++++++++++++++++++++-----
> net/mptcp/protocol.h | 3 ++
> 3 files changed, 109 insertions(+), 10 deletions(-)
>
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 756a636d3d3a..db34306eb73f 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -775,6 +775,22 @@ static void update_una(struct mptcp_sock *msk,
> }
> }
>
> +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
> +{
> + /* Skip if DATA_FIN was already received.
> + * If updating simultaneously with the recvmsg loop, values
> + * should match. If they mismatch, the peer is misbehaving and
> + * we will prefer the most recent information.
> + */
> + if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
> + return false;
> +
> + WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
> + WRITE_ONCE(msk->rcv_data_fin, 1);
> +
> + return true;
> +}
> +
> static bool add_addr_hmac_valid(struct mptcp_sock *msk,
> struct mptcp_options_received *mp_opt)
> {
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 69100187490f..43e48327902e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -16,6 +16,7 @@
> #include <net/inet_hashtables.h>
> #include <net/protocol.h>
> #include <net/tcp.h>
> +#include <net/tcp_states.h>
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> #include <net/transp_v6.h>
> #endif
> @@ -163,6 +164,95 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
> return mptcp_subflow_data_available(ssk);
> }
>
> +static bool mptcp_pending_data_fin(struct sock *sk)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> +
> + return (READ_ONCE(msk->rcv_data_fin) &&
> + ((1 << sk->sk_state) &
> + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)) &&
> + msk->ack_seq == READ_ONCE(msk->rcv_data_fin_seq));
> +}
> +
> +static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
> +{
> + long tout = ssk && inet_csk(ssk)->icsk_pending ?
> + inet_csk(ssk)->icsk_timeout - jiffies : 0;
> +
> + if (tout <= 0)
> + tout = mptcp_sk(sk)->timer_ival;
> + mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
> +}
> +
> +static void mptcp_check_data_fin(struct sock *sk)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> +
> + if (__mptcp_check_fallback(msk) || !msk->first)
> + return;
> +
> + /* Need to ack a DATA_FIN received from a peer while this side
> + * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
> + * msk->rcv_data_fin was set when parsing the incoming options
> + * at the subflow level and the msk lock was not held, so this
> + * is the first opportunity to act on the DATA_FIN and change
> + * the msk state.
> + *
> + * If we are caught up to the sequence number of the incoming
> + * DATA_FIN, send the DATA_ACK now and do state transition. If
> + * not caught up, do nothing and let the recv code send DATA_ACK
> + * when catching up.
> + */
> +
> + if (READ_ONCE(msk->rcv_data_fin) &&
> + ((1 << sk->sk_state) &
> + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
> + u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
> + struct mptcp_subflow_context *subflow;
> +
> + if (msk->ack_seq + 1 == rcv_data_fin_seq) {
This is quite alike mptcp_pending_data_fin(), so perhaps the helper
could be reused here? see also the comment on patch 8/12.
> + msk->ack_seq++;
> + WRITE_ONCE(msk->rcv_data_fin, 0);
> +
> + sk->sk_shutdown |= RCV_SHUTDOWN;
> +
> + switch (sk->sk_state) {
> + case TCP_ESTABLISHED:
> + inet_sk_state_store(sk, TCP_CLOSE_WAIT);
> + break;
> + case TCP_FIN_WAIT1:
> + inet_sk_state_store(sk, TCP_CLOSING);
> + break;
> + case TCP_FIN_WAIT2:
> + inet_sk_state_store(sk, TCP_CLOSE);
> + // @@ Close subflows now?
> + break;
> + default:
> + /* Other states not expected */
> + WARN_ON_ONCE(1);
> + break;
it's a pity we can't use mptcp_close_state() here.
> + }
> +
> + mptcp_set_timeout(sk, NULL);
> + mptcp_for_each_subflow(msk, subflow) {
> + struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> +
> + lock_sock(ssk);
> + tcp_send_ack(ssk);
> + release_sock(ssk);
> + }
> +
> + sk->sk_state_change(sk);
> +
> + if (sk->sk_shutdown == SHUTDOWN_MASK ||
> + sk->sk_state == TCP_CLOSE)
> + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
> + else
> + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
> + }
> + }
> +}
> +
> static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
> struct sock *ssk,
> unsigned int *bytes)
> @@ -303,16 +393,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
> spin_unlock_bh(&msk->join_list_lock);
> }
>
> -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
> -{
> - long tout = ssk && inet_csk(ssk)->icsk_pending ?
> - inet_csk(ssk)->icsk_timeout - jiffies : 0;
> -
> - if (tout <= 0)
> - tout = mptcp_sk(sk)->timer_ival;
> - mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
> -}
> -
> static bool mptcp_timer_pending(struct sock *sk)
> {
> return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 08198b61b8a6..813c9d5c59f2 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -193,11 +193,13 @@ struct mptcp_sock {
> u64 remote_key;
> u64 write_seq;
> u64 ack_seq;
> + u64 rcv_data_fin_seq;
> atomic64_t snd_una;
> unsigned long timer_ival;
> u32 token;
> unsigned long flags;
> bool can_ack;
> + bool rcv_data_fin;
> spinlock_t join_list_lock;
> struct work_struct work;
> struct list_head conn_list;
Note: we should revisit mptcp_sock binary layout someday ;)
/P
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-07-17 18:28 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-17 18:28 [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers Mat Martineau
-- strict thread matches above, loose matches on Subject: below --
2020-07-16 13:33 Paolo Abeni
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.