All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers
@ 2020-07-17 18:28 Mat Martineau
  0 siblings, 0 replies; 2+ messages in thread
From: Mat Martineau @ 2020-07-17 18:28 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 6991 bytes --]

On Thu, 16 Jul 2020, Paolo Abeni wrote:

> On Wed, 2020-07-15 at 17:20 -0700, Mat Martineau wrote:
>> Incoming DATA_FIN headers need to propagate the presence of the DATA_FIN
>> bit and the associated sequence number to the MPTCP layer, even when
>> arriving on a bare ACK that does not get added to the receive queue. Add
>> structure members to store the DATA_FIN information and helpers to set
>> and check those values.
>>
>> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
>> ---
>>  net/mptcp/options.c  |  16 +++++++
>>  net/mptcp/protocol.c | 100 ++++++++++++++++++++++++++++++++++++++-----
>>  net/mptcp/protocol.h |   3 ++
>>  3 files changed, 109 insertions(+), 10 deletions(-)
>>
>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>> index 756a636d3d3a..db34306eb73f 100644
>> --- a/net/mptcp/options.c
>> +++ b/net/mptcp/options.c
>> @@ -775,6 +775,22 @@ static void update_una(struct mptcp_sock *msk,
>>  	}
>>  }
>>
>> +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
>> +{
>> +	/* Skip if DATA_FIN was already received.
>> +	 * If updating simultaneously with the recvmsg loop, values
>> +	 * should match. If they mismatch, the peer is misbehaving and
>> +	 * we will prefer the most recent information.
>> +	 */
>> +	if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
>> +		return false;
>> +
>> +	WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
>> +	WRITE_ONCE(msk->rcv_data_fin, 1);
>> +
>> +	return true;
>> +}
>> +
>>  static bool add_addr_hmac_valid(struct mptcp_sock *msk,
>>  				struct mptcp_options_received *mp_opt)
>>  {
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 69100187490f..43e48327902e 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -16,6 +16,7 @@
>>  #include <net/inet_hashtables.h>
>>  #include <net/protocol.h>
>>  #include <net/tcp.h>
>> +#include <net/tcp_states.h>
>>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>>  #include <net/transp_v6.h>
>>  #endif
>> @@ -163,6 +164,95 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
>>  	return mptcp_subflow_data_available(ssk);
>>  }
>>
>> +static bool mptcp_pending_data_fin(struct sock *sk)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> +	return (READ_ONCE(msk->rcv_data_fin) &&
>> +		((1 << sk->sk_state) &
>> +		 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)) &&
>> +		msk->ack_seq == READ_ONCE(msk->rcv_data_fin_seq));
>> +}
>> +
>> +static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
>> +{
>> +	long tout = ssk && inet_csk(ssk)->icsk_pending ?
>> +				      inet_csk(ssk)->icsk_timeout - jiffies : 0;
>> +
>> +	if (tout <= 0)
>> +		tout = mptcp_sk(sk)->timer_ival;
>> +	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
>> +}
>> +
>> +static void mptcp_check_data_fin(struct sock *sk)
>> +{
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +
>> +	if (__mptcp_check_fallback(msk) || !msk->first)
>> +		return;
>> +
>> +	/* Need to ack a DATA_FIN received from a peer while this side
>> +	 * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
>> +	 * msk->rcv_data_fin was set when parsing the incoming options
>> +	 * at the subflow level and the msk lock was not held, so this
>> +	 * is the first opportunity to act on the DATA_FIN and change
>> +	 * the msk state.
>> +	 *
>> +	 * If we are caught up to the sequence number of the incoming
>> +	 * DATA_FIN, send the DATA_ACK now and do state transition.  If
>> +	 * not caught up, do nothing and let the recv code send DATA_ACK
>> +	 * when catching up.
>> +	 */
>> +
>> +	if (READ_ONCE(msk->rcv_data_fin) &&
>> +	    ((1 << sk->sk_state) &
>> +	     (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
>> +		u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
>> +		struct mptcp_subflow_context *subflow;
>> +
>> +		if (msk->ack_seq + 1 == rcv_data_fin_seq) {
>
> This is quite alike mptcp_pending_data_fin(), so perhaps the helper
> could be reused here? see also the comment on patch 8/12.
>

Right, I'll fix that.

>> +			msk->ack_seq++;
>> +			WRITE_ONCE(msk->rcv_data_fin, 0);
>> +
>> +			sk->sk_shutdown |= RCV_SHUTDOWN;
>> +
>> +			switch (sk->sk_state) {
>> +			case TCP_ESTABLISHED:
>> +				inet_sk_state_store(sk, TCP_CLOSE_WAIT);
>> +				break;
>> +			case TCP_FIN_WAIT1:
>> +				inet_sk_state_store(sk, TCP_CLOSING);
>> +				break;
>> +			case TCP_FIN_WAIT2:
>> +				inet_sk_state_store(sk, TCP_CLOSE);
>> +				// @@ Close subflows now?
>> +				break;
>> +			default:
>> +				/* Other states not expected */
>> +				WARN_ON_ONCE(1);
>> +				break;
>
> it's a pity we can't use mptcp_close_state() here.

Yeah, this spot is more like tcp_fin(), which has different transitions 
from tcp_close_state(). Taking a second look at that, the WARN_ON_ONCE(1) 
doesn't seem right, since there may be retransmissions of DATA_FIN in a 
few other states.

>
>> +			}
>> +
>> +			mptcp_set_timeout(sk, NULL);
>> +			mptcp_for_each_subflow(msk, subflow) {
>> +				struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
>> +
>> +				lock_sock(ssk);
>> +				tcp_send_ack(ssk);
>> +				release_sock(ssk);
>> +			}
>> +
>> +			sk->sk_state_change(sk);
>> +
>> +			if (sk->sk_shutdown == SHUTDOWN_MASK ||
>> +			    sk->sk_state == TCP_CLOSE)
>> +				sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
>> +			else
>> +				sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
>> +		}
>> +	}
>> +}
>> +
>>  static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>>  					   struct sock *ssk,
>>  					   unsigned int *bytes)
>> @@ -303,16 +393,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
>>  	spin_unlock_bh(&msk->join_list_lock);
>>  }
>>
>> -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
>> -{
>> -	long tout = ssk && inet_csk(ssk)->icsk_pending ?
>> -				      inet_csk(ssk)->icsk_timeout - jiffies : 0;
>> -
>> -	if (tout <= 0)
>> -		tout = mptcp_sk(sk)->timer_ival;
>> -	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
>> -}
>> -
>>  static bool mptcp_timer_pending(struct sock *sk)
>>  {
>>  	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index 08198b61b8a6..813c9d5c59f2 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -193,11 +193,13 @@ struct mptcp_sock {
>>  	u64		remote_key;
>>  	u64		write_seq;
>>  	u64		ack_seq;
>> +	u64		rcv_data_fin_seq;
>>  	atomic64_t	snd_una;
>>  	unsigned long	timer_ival;
>>  	u32		token;
>>  	unsigned long	flags;
>>  	bool		can_ack;
>> +	bool		rcv_data_fin;
>>  	spinlock_t	join_list_lock;
>>  	struct work_struct work;
>>  	struct list_head conn_list;
>
> Note: we should revisit mptcp_sock binary layout someday ;)

Agreed!

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers
@ 2020-07-16 13:33 Paolo Abeni
  0 siblings, 0 replies; 2+ messages in thread
From: Paolo Abeni @ 2020-07-16 13:33 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 6455 bytes --]

On Wed, 2020-07-15 at 17:20 -0700, Mat Martineau wrote:
> Incoming DATA_FIN headers need to propagate the presence of the DATA_FIN
> bit and the associated sequence number to the MPTCP layer, even when
> arriving on a bare ACK that does not get added to the receive queue. Add
> structure members to store the DATA_FIN information and helpers to set
> and check those values.
> 
> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
> ---
>  net/mptcp/options.c  |  16 +++++++
>  net/mptcp/protocol.c | 100 ++++++++++++++++++++++++++++++++++++++-----
>  net/mptcp/protocol.h |   3 ++
>  3 files changed, 109 insertions(+), 10 deletions(-)
> 
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index 756a636d3d3a..db34306eb73f 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -775,6 +775,22 @@ static void update_una(struct mptcp_sock *msk,
>  	}
>  }
>  
> +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
> +{
> +	/* Skip if DATA_FIN was already received.
> +	 * If updating simultaneously with the recvmsg loop, values
> +	 * should match. If they mismatch, the peer is misbehaving and
> +	 * we will prefer the most recent information.
> +	 */
> +	if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
> +		return false;
> +
> +	WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
> +	WRITE_ONCE(msk->rcv_data_fin, 1);
> +
> +	return true;
> +}
> +
>  static bool add_addr_hmac_valid(struct mptcp_sock *msk,
>  				struct mptcp_options_received *mp_opt)
>  {
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 69100187490f..43e48327902e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -16,6 +16,7 @@
>  #include <net/inet_hashtables.h>
>  #include <net/protocol.h>
>  #include <net/tcp.h>
> +#include <net/tcp_states.h>
>  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
>  #include <net/transp_v6.h>
>  #endif
> @@ -163,6 +164,95 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
>  	return mptcp_subflow_data_available(ssk);
>  }
>  
> +static bool mptcp_pending_data_fin(struct sock *sk)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	return (READ_ONCE(msk->rcv_data_fin) &&
> +		((1 << sk->sk_state) &
> +		 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)) &&
> +		msk->ack_seq == READ_ONCE(msk->rcv_data_fin_seq));
> +}
> +
> +static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
> +{
> +	long tout = ssk && inet_csk(ssk)->icsk_pending ?
> +				      inet_csk(ssk)->icsk_timeout - jiffies : 0;
> +
> +	if (tout <= 0)
> +		tout = mptcp_sk(sk)->timer_ival;
> +	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
> +}
> +
> +static void mptcp_check_data_fin(struct sock *sk)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	if (__mptcp_check_fallback(msk) || !msk->first)
> +		return;
> +
> +	/* Need to ack a DATA_FIN received from a peer while this side
> +	 * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
> +	 * msk->rcv_data_fin was set when parsing the incoming options
> +	 * at the subflow level and the msk lock was not held, so this
> +	 * is the first opportunity to act on the DATA_FIN and change
> +	 * the msk state.
> +	 *
> +	 * If we are caught up to the sequence number of the incoming
> +	 * DATA_FIN, send the DATA_ACK now and do state transition.  If
> +	 * not caught up, do nothing and let the recv code send DATA_ACK
> +	 * when catching up.
> +	 */
> +
> +	if (READ_ONCE(msk->rcv_data_fin) &&
> +	    ((1 << sk->sk_state) &
> +	     (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
> +		u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
> +		struct mptcp_subflow_context *subflow;
> +
> +		if (msk->ack_seq + 1 == rcv_data_fin_seq) {

This is quite alike mptcp_pending_data_fin(), so perhaps the helper
could be reused here? see also the comment on patch 8/12.

> +			msk->ack_seq++;
> +			WRITE_ONCE(msk->rcv_data_fin, 0);
> +
> +			sk->sk_shutdown |= RCV_SHUTDOWN;
> +
> +			switch (sk->sk_state) {
> +			case TCP_ESTABLISHED:
> +				inet_sk_state_store(sk, TCP_CLOSE_WAIT);
> +				break;
> +			case TCP_FIN_WAIT1:
> +				inet_sk_state_store(sk, TCP_CLOSING);
> +				break;
> +			case TCP_FIN_WAIT2:
> +				inet_sk_state_store(sk, TCP_CLOSE);
> +				// @@ Close subflows now?
> +				break;
> +			default:
> +				/* Other states not expected */
> +				WARN_ON_ONCE(1);
> +				break;

it's a pity we can't use mptcp_close_state() here.

> +			}
> +
> +			mptcp_set_timeout(sk, NULL);
> +			mptcp_for_each_subflow(msk, subflow) {
> +				struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> +
> +				lock_sock(ssk);
> +				tcp_send_ack(ssk);
> +				release_sock(ssk);
> +			}
> +
> +			sk->sk_state_change(sk);
> +
> +			if (sk->sk_shutdown == SHUTDOWN_MASK ||
> +			    sk->sk_state == TCP_CLOSE)
> +				sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
> +			else
> +				sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
> +		}
> +	}
> +}
> +
>  static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>  					   struct sock *ssk,
>  					   unsigned int *bytes)
> @@ -303,16 +393,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
>  	spin_unlock_bh(&msk->join_list_lock);
>  }
>  
> -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
> -{
> -	long tout = ssk && inet_csk(ssk)->icsk_pending ?
> -				      inet_csk(ssk)->icsk_timeout - jiffies : 0;
> -
> -	if (tout <= 0)
> -		tout = mptcp_sk(sk)->timer_ival;
> -	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
> -}
> -
>  static bool mptcp_timer_pending(struct sock *sk)
>  {
>  	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 08198b61b8a6..813c9d5c59f2 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -193,11 +193,13 @@ struct mptcp_sock {
>  	u64		remote_key;
>  	u64		write_seq;
>  	u64		ack_seq;
> +	u64		rcv_data_fin_seq;
>  	atomic64_t	snd_una;
>  	unsigned long	timer_ival;
>  	u32		token;
>  	unsigned long	flags;
>  	bool		can_ack;
> +	bool		rcv_data_fin;
>  	spinlock_t	join_list_lock;
>  	struct work_struct work;
>  	struct list_head conn_list;

Note: we should revisit mptcp_sock binary layout someday ;)

/P


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-07-17 18:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-17 18:28 [MPTCP] Re: [PATCH mptcp-next 04/12] mptcp: Track received DATA_FIN sequence number and add related helpers Mat Martineau
  -- strict thread matches above, loose matches on Subject: below --
2020-07-16 13:33 Paolo Abeni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.