* [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf
@ 2021-06-16 9:29 Paolo Abeni
2021-06-17 0:13 ` Mat Martineau
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Paolo Abeni @ 2021-06-16 9:29 UTC (permalink / raw)
To: mptcp
The current cleanup rbuf tries a bit too hard to avoid acquiring
the subflow socket lock. We may end-up delaying the needed ack,
or skip acking a blocked subflow.
Address the above extending the conditions used to trigger the cleanup
to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
on all the active subflows.
Note that we can't replicate the exact tests implemented in
tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
ping-pong mode.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v2 -> v3:
- mptcp_subflow_cleanup_rbuf() returns void
v1 -> v2:
- access ssk icsk/tp state instead of msk (Mat)
---
net/mptcp/protocol.c | 56 ++++++++++++++++++--------------------------
net/mptcp/protocol.h | 1 -
2 files changed, 23 insertions(+), 34 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0a220862f62d..05c8382aafef 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
}
}
-static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
bool slow;
- int ret;
slow = lock_sock_fast(ssk);
- ret = tcp_can_send_ack(ssk);
- if (ret)
+ if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
- return ret;
+}
+
+static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
+{
+ const struct inet_connection_sock *icsk = inet_csk(ssk);
+ bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ return (ack_pending & ICSK_ACK_SCHED) &&
+ ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
+ READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
+ (rx_empty && ack_pending &
+ (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
- struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- bool cleanup;
+ int space = __mptcp_space(sk);
+ bool cleanup, rx_empty;
- /* this is a simple superset of what tcp_cleanup_rbuf() implements
- * so that we don't have to acquire the ssk socket lock most of the time
- * to do actually nothing
- */
- cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
- if (!cleanup)
- return;
+ cleanup = (space > 0) && (space >= (old_space << 1));
+ rx_empty = !atomic_read(&sk->sk_rmem_alloc);
- /* if the hinted ssk is still active, try to use it */
- if (likely(ack_hint)) {
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
- return;
- }
+ if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
+ mptcp_subflow_cleanup_rbuf(ssk);
}
-
- /* otherwise pick the first active subflow */
- mptcp_for_each_subflow(msk, subflow)
- if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
- return;
}
static bool mptcp_check_data_fin(struct sock *sk)
@@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
- WRITE_ONCE(msk->ack_hint, ssk);
*bytes += moved;
return done;
@@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- tcp_cleanup_rbuf(ssk, moved);
if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
@@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
- mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;
- if (ssk == msk->ack_hint)
- msk->ack_hint = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
@@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
msk->rmem_released = 0;
msk->tx_pending_data = 0;
- msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 41f2957bf50c..2480db50cbd2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -243,7 +243,6 @@ struct mptcp_sock {
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
spinlock_t join_list_lock;
- struct sock *ack_hint;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
--
2.26.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf
2021-06-16 9:29 [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf Paolo Abeni
@ 2021-06-17 0:13 ` Mat Martineau
2021-06-17 13:28 ` Matthieu Baerts
2021-06-18 17:35 ` Paolo Abeni
2 siblings, 0 replies; 5+ messages in thread
From: Mat Martineau @ 2021-06-17 0:13 UTC (permalink / raw)
To: Paolo Abeni; +Cc: mptcp
On Wed, 16 Jun 2021, Paolo Abeni wrote:
> The current cleanup rbuf tries a bit too hard to avoid acquiring
> the subflow socket lock. We may end-up delaying the needed ack,
> or skip acking a blocked subflow.
>
> Address the above extending the conditions used to trigger the cleanup
> to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
> on all the active subflows.
>
> Note that we can't replicate the exact tests implemented in
> tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
> ping-pong mode.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v2 -> v3:
> - mptcp_subflow_cleanup_rbuf() returns void
Thanks for the v3, this looks ready to apply.
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
> v1 -> v2:
> - access ssk icsk/tp state instead of msk (Mat)
> ---
> net/mptcp/protocol.c | 56 ++++++++++++++++++--------------------------
> net/mptcp/protocol.h | 1 -
> 2 files changed, 23 insertions(+), 34 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 0a220862f62d..05c8382aafef 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
> }
> }
>
> -static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
> +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
> {
> bool slow;
> - int ret;
>
> slow = lock_sock_fast(ssk);
> - ret = tcp_can_send_ack(ssk);
> - if (ret)
> + if (tcp_can_send_ack(ssk))
> tcp_cleanup_rbuf(ssk, 1);
> unlock_sock_fast(ssk, slow);
> - return ret;
> +}
> +
> +static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
> +{
> + const struct inet_connection_sock *icsk = inet_csk(ssk);
> + bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
> + const struct tcp_sock *tp = tcp_sk(ssk);
> +
> + return (ack_pending & ICSK_ACK_SCHED) &&
> + ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
> + READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
> + (rx_empty && ack_pending &
> + (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
> }
>
> static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
> {
> - struct sock *ack_hint = READ_ONCE(msk->ack_hint);
> int old_space = READ_ONCE(msk->old_wspace);
> struct mptcp_subflow_context *subflow;
> struct sock *sk = (struct sock *)msk;
> - bool cleanup;
> + int space = __mptcp_space(sk);
> + bool cleanup, rx_empty;
>
> - /* this is a simple superset of what tcp_cleanup_rbuf() implements
> - * so that we don't have to acquire the ssk socket lock most of the time
> - * to do actually nothing
> - */
> - cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
> - if (!cleanup)
> - return;
> + cleanup = (space > 0) && (space >= (old_space << 1));
> + rx_empty = !atomic_read(&sk->sk_rmem_alloc);
>
> - /* if the hinted ssk is still active, try to use it */
> - if (likely(ack_hint)) {
> - mptcp_for_each_subflow(msk, subflow) {
> - struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> + mptcp_for_each_subflow(msk, subflow) {
> + struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
>
> - if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
> - return;
> - }
> + if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
> + mptcp_subflow_cleanup_rbuf(ssk);
> }
> -
> - /* otherwise pick the first active subflow */
> - mptcp_for_each_subflow(msk, subflow)
> - if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
> - return;
> }
>
> static bool mptcp_check_data_fin(struct sock *sk)
> @@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
> break;
> }
> } while (more_data_avail);
> - WRITE_ONCE(msk->ack_hint, ssk);
>
> *bytes += moved;
> return done;
> @@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
> __mptcp_update_rmem(sk);
> done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
> mptcp_data_unlock(sk);
> - tcp_cleanup_rbuf(ssk, moved);
>
> if (unlikely(ssk->sk_err))
> __mptcp_error_report(sk);
> @@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
> ret |= __mptcp_ofo_queue(msk);
> __mptcp_splice_receive_queue(sk);
> mptcp_data_unlock(sk);
> - mptcp_cleanup_rbuf(msk);
> }
> if (ret)
> mptcp_check_data_fin((struct sock *)msk);
> @@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> if (ssk == msk->last_snd)
> msk->last_snd = NULL;
>
> - if (ssk == msk->ack_hint)
> - msk->ack_hint = NULL;
> -
> if (ssk == msk->first)
> msk->first = NULL;
>
> @@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
> msk->rmem_released = 0;
> msk->tx_pending_data = 0;
>
> - msk->ack_hint = NULL;
> msk->first = NULL;
> inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
> WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 41f2957bf50c..2480db50cbd2 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -243,7 +243,6 @@ struct mptcp_sock {
> bool use_64bit_ack; /* Set when we received a 64-bit DSN */
> bool csum_enabled;
> spinlock_t join_list_lock;
> - struct sock *ack_hint;
> struct work_struct work;
> struct sk_buff *ooo_last_skb;
> struct rb_root out_of_order_queue;
> --
> 2.26.3
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf
2021-06-16 9:29 [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf Paolo Abeni
2021-06-17 0:13 ` Mat Martineau
@ 2021-06-17 13:28 ` Matthieu Baerts
2021-06-18 17:35 ` Paolo Abeni
2 siblings, 0 replies; 5+ messages in thread
From: Matthieu Baerts @ 2021-06-17 13:28 UTC (permalink / raw)
To: Paolo Abeni, mptcp
Hi Paolo, Mat,
On 16/06/2021 11:29, Paolo Abeni wrote:
> The current cleanup rbuf tries a bit too hard to avoid acquiring
> the subflow socket lock. We may end-up delaying the needed ack,
> or skip acking a blocked subflow.
>
> Address the above extending the conditions used to trigger the cleanup
> to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
> on all the active subflows.
>
> Note that we can't replicate the exact tests implemented in
> tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
> ping-pong mode.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Thank you for the patch and the review!
Now in our tree (feature for net-next) with Mat's RvB tag:
- 7c5442f3e2bf: mptcp: refine mptcp_cleanup_rbuf
- Results: 8db480094258..f40f6ff5a355
Builds and tests are now in progress:
https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20210617T132720
https://github.com/multipath-tcp/mptcp_net-next/actions/workflows/build-validation.yml?query=branch:export/20210617T132720
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf
2021-06-16 9:29 [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf Paolo Abeni
2021-06-17 0:13 ` Mat Martineau
2021-06-17 13:28 ` Matthieu Baerts
@ 2021-06-18 17:35 ` Paolo Abeni
2021-06-21 10:50 ` Paolo Abeni
2 siblings, 1 reply; 5+ messages in thread
From: Paolo Abeni @ 2021-06-18 17:35 UTC (permalink / raw)
To: mptcp
On Wed, 2021-06-16 at 11:29 +0200, Paolo Abeni wrote:
> The current cleanup rbuf tries a bit too hard to avoid acquiring
> the subflow socket lock. We may end-up delaying the needed ack,
> or skip acking a blocked subflow.
>
> Address the above extending the conditions used to trigger the cleanup
> to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
> on all the active subflows.
>
> Note that we can't replicate the exact tests implemented in
> tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
> ping-pong mode.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v2 -> v3:
> - mptcp_subflow_cleanup_rbuf() returns void
> v1 -> v2:
> - access ssk icsk/tp state instead of msk (Mat)
Darn me! it looks like this patch has some issues, that are almost
hidden by mptcp-level retrans. Investigating. I'll try to provide a
squash-to patch.
Meanwhile we should not send this one to netdev.
Thanks!
Paolo
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf
2021-06-18 17:35 ` Paolo Abeni
@ 2021-06-21 10:50 ` Paolo Abeni
0 siblings, 0 replies; 5+ messages in thread
From: Paolo Abeni @ 2021-06-21 10:50 UTC (permalink / raw)
To: mptcp
On Fri, 2021-06-18 at 19:35 +0200, Paolo Abeni wrote:
> On Wed, 2021-06-16 at 11:29 +0200, Paolo Abeni wrote:
> > The current cleanup rbuf tries a bit too hard to avoid acquiring
> > the subflow socket lock. We may end-up delaying the needed ack,
> > or skip acking a blocked subflow.
> >
> > Address the above extending the conditions used to trigger the cleanup
> > to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
> > on all the active subflows.
> >
> > Note that we can't replicate the exact tests implemented in
> > tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
> > ping-pong mode.
> >
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > v2 -> v3:
> > - mptcp_subflow_cleanup_rbuf() returns void
> > v1 -> v2:
> > - access ssk icsk/tp state instead of msk (Mat)
>
> Darn me! it looks like this patch has some issues, that are almost
> hidden by mptcp-level retrans. Investigating. I'll try to provide a
> squash-to patch.
>
> Meanwhile we should not send this one to netdev.
Well, it looks like the issue I observer predates this change. The
point is: if we disable mptcp-level retrans, mptcp_connect.sh self-test
in a loop hang on some lossy test-case (which is unexpected).
I *think* the solution is still in a better mptcp_cleanup_rbuf() -
which is eluding me since several iteration :(
/P
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-06-21 10:50 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-16 9:29 [PATCH v3 mptcp-next] mptcp: refine mptcp_cleanup_rbuf Paolo Abeni
2021-06-17 0:13 ` Mat Martineau
2021-06-17 13:28 ` Matthieu Baerts
2021-06-18 17:35 ` Paolo Abeni
2021-06-21 10:50 ` Paolo Abeni
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).