From: Mat Martineau <mathew.j.martineau at linux.intel.com>
To: mptcp at lists.01.org
Subject: [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers
Date: Fri, 17 Jul 2020 11:28:53 -0700 [thread overview]
Message-ID: <alpine.OSX.2.23.453.2007171107510.39222@kdpetti-mobl.amr.corp.intel.com> (raw)
In-Reply-To: 49bf8d332e40fbe31741f4b75052a7eb277b0195.camel@redhat.com
[-- Attachment #1: Type: text/plain, Size: 6991 bytes --]
On Thu, 16 Jul 2020, Paolo Abeni wrote:
> On Wed, 2020-07-15 at 17:20 -0700, Mat Martineau wrote:
>> Incoming DATA_FIN headers need to propagate the presence of the DATA_FIN
>> bit and the associated sequence number to the MPTCP layer, even when
>> arriving on a bare ACK that does not get added to the receive queue. Add
>> structure members to store the DATA_FIN information and helpers to set
>> and check those values.
>>
>> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
>> ---
>> net/mptcp/options.c | 16 +++++++
>> net/mptcp/protocol.c | 100 ++++++++++++++++++++++++++++++++++++++-----
>> net/mptcp/protocol.h | 3 ++
>> 3 files changed, 109 insertions(+), 10 deletions(-)
>>
>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>> index 756a636d3d3a..db34306eb73f 100644
>> --- a/net/mptcp/options.c
>> +++ b/net/mptcp/options.c
>> @@ -775,6 +775,22 @@ static void update_una(struct mptcp_sock *msk,
>> }
>> }
>>
>> +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
>> +{
>> + /* Skip if DATA_FIN was already received.
>> + * If updating simultaneously with the recvmsg loop, values
>> + * should match. If they mismatch, the peer is misbehaving and
>> + * we will prefer the most recent information.
>> + */
>> + if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
>> + return false;
>> +
>> + WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
>> + WRITE_ONCE(msk->rcv_data_fin, 1);
>> +
>> + return true;
>> +}
>> +
>> static bool add_addr_hmac_valid(struct mptcp_sock *msk,
>> struct mptcp_options_received *mp_opt)
>> {
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 69100187490f..43e48327902e 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -16,6 +16,7 @@
>> #include <net/inet_hashtables.h>
>> #include <net/protocol.h>
>> #include <net/tcp.h>
>> +#include <net/tcp_states.h>
>> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>> #include <net/transp_v6.h>
>> #endif
>> @@ -163,6 +164,95 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
>> return mptcp_subflow_data_available(ssk);
>> }
>>
>> +static bool mptcp_pending_data_fin(struct sock *sk)
>> +{
>> + struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> + return (READ_ONCE(msk->rcv_data_fin) &&
>> + ((1 << sk->sk_state) &
>> + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)) &&
>> + msk->ack_seq == READ_ONCE(msk->rcv_data_fin_seq));
>> +}
>> +
>> +static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
>> +{
>> + long tout = ssk && inet_csk(ssk)->icsk_pending ?
>> + inet_csk(ssk)->icsk_timeout - jiffies : 0;
>> +
>> + if (tout <= 0)
>> + tout = mptcp_sk(sk)->timer_ival;
>> + mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
>> +}
>> +
>> +static void mptcp_check_data_fin(struct sock *sk)
>> +{
>> + struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> + if (__mptcp_check_fallback(msk) || !msk->first)
>> + return;
>> +
>> + /* Need to ack a DATA_FIN received from a peer while this side
>> + * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
>> + * msk->rcv_data_fin was set when parsing the incoming options
>> + * at the subflow level and the msk lock was not held, so this
>> + * is the first opportunity to act on the DATA_FIN and change
>> + * the msk state.
>> + *
>> + * If we are caught up to the sequence number of the incoming
>> + * DATA_FIN, send the DATA_ACK now and do state transition. If
>> + * not caught up, do nothing and let the recv code send DATA_ACK
>> + * when catching up.
>> + */
>> +
>> + if (READ_ONCE(msk->rcv_data_fin) &&
>> + ((1 << sk->sk_state) &
>> + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
>> + u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
>> + struct mptcp_subflow_context *subflow;
>> +
>> + if (msk->ack_seq + 1 == rcv_data_fin_seq) {
>
> This is quite alike mptcp_pending_data_fin(), so perhaps the helper
> could be reused here? see also the comment on patch 8/12.
>
Right, I'll fix that.
>> + msk->ack_seq++;
>> + WRITE_ONCE(msk->rcv_data_fin, 0);
>> +
>> + sk->sk_shutdown |= RCV_SHUTDOWN;
>> +
>> + switch (sk->sk_state) {
>> + case TCP_ESTABLISHED:
>> + inet_sk_state_store(sk, TCP_CLOSE_WAIT);
>> + break;
>> + case TCP_FIN_WAIT1:
>> + inet_sk_state_store(sk, TCP_CLOSING);
>> + break;
>> + case TCP_FIN_WAIT2:
>> + inet_sk_state_store(sk, TCP_CLOSE);
>> + // @@ Close subflows now?
>> + break;
>> + default:
>> + /* Other states not expected */
>> + WARN_ON_ONCE(1);
>> + break;
>
> it's a pity we can't use mptcp_close_state() here.
Yeah, this spot is more like tcp_fin(), which has different transitions
from tcp_close_state(). Taking a second look at that, the WARN_ON_ONCE(1)
doesn't seem right, since there may be retransmissions of DATA_FIN in a
few other states.
>
>> + }
>> +
>> + mptcp_set_timeout(sk, NULL);
>> + mptcp_for_each_subflow(msk, subflow) {
>> + struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
>> +
>> + lock_sock(ssk);
>> + tcp_send_ack(ssk);
>> + release_sock(ssk);
>> + }
>> +
>> + sk->sk_state_change(sk);
>> +
>> + if (sk->sk_shutdown == SHUTDOWN_MASK ||
>> + sk->sk_state == TCP_CLOSE)
>> + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
>> + else
>> + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
>> + }
>> + }
>> +}
>> +
>> static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>> struct sock *ssk,
>> unsigned int *bytes)
>> @@ -303,16 +393,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
>> spin_unlock_bh(&msk->join_list_lock);
>> }
>>
>> -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
>> -{
>> - long tout = ssk && inet_csk(ssk)->icsk_pending ?
>> - inet_csk(ssk)->icsk_timeout - jiffies : 0;
>> -
>> - if (tout <= 0)
>> - tout = mptcp_sk(sk)->timer_ival;
>> - mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
>> -}
>> -
>> static bool mptcp_timer_pending(struct sock *sk)
>> {
>> return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index 08198b61b8a6..813c9d5c59f2 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -193,11 +193,13 @@ struct mptcp_sock {
>> u64 remote_key;
>> u64 write_seq;
>> u64 ack_seq;
>> + u64 rcv_data_fin_seq;
>> atomic64_t snd_una;
>> unsigned long timer_ival;
>> u32 token;
>> unsigned long flags;
>> bool can_ack;
>> + bool rcv_data_fin;
>> spinlock_t join_list_lock;
>> struct work_struct work;
>> struct list_head conn_list;
>
> Note: we should revisit mptcp_sock binary layout someday ;)
Agreed!
--
Mat Martineau
Intel
next reply other threads:[~2020-07-17 18:28 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-17 18:28 Mat Martineau [this message]
-- strict thread matches above, loose matches on Subject: below --
2020-07-16 13:33 [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers Paolo Abeni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.OSX.2.23.453.2007171107510.39222@kdpetti-mobl.amr.corp.intel.com \
--to=unknown@example.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.