All of lore.kernel.org
 help / color / mirror / Atom feed
* [MPTCP] [PRE-RFC 5/6] mptcp: use retransmission timer to update msk una
@ 2019-08-16 16:48 Paolo Abeni
  0 siblings, 0 replies; 2+ messages in thread
From: Paolo Abeni @ 2019-08-16 16:48 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 6207 bytes --]

plugging unacked sequence update into sendmsg() is not enough;
use the msk icsk retransmit timer to trigger una updates. The
timer freqeuncy is based on the current subflow RTO estimation.

The timer is clearer for good at destroy() time

Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
---
 net/mptcp/protocol.c | 116 ++++++++++++++++++++++++++++++++++++++++++-
 net/mptcp/protocol.h |   1 +
 2 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index dcfffb71b545..5eba4e65ca93 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -17,6 +17,33 @@
 #include <net/mptcp.h>
 #include "protocol.h"
 
+static unsigned long mptcp_timeout(const struct sock *sk,
+				   const struct sock *ssk)
+{
+	unsigned long tout = inet_csk(ssk)->icsk_pending ?
+			     inet_csk(ssk)->icsk_timeout - jiffies : 0;
+
+	if (!tout)
+		tout = inet_csk(sk)->icsk_timeout;
+	return tout ?: TCP_RTO_MIN;
+}
+
+static void mptcp_reset_timer(struct sock *sk, unsigned long tout)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
+	icsk->icsk_timeout = tout;
+}
+
+static void mptcp_stop_timer(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+	icsk->icsk_timeout = 0;
+}
+
 static struct socket *__mptcp_fallback_get_ref(const struct mptcp_sock *msk)
 {
 	sock_owned_by_me((const struct sock *)msk);
@@ -78,6 +105,7 @@ static inline bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
 static u64 mptcp_update_msk_una(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct mptcp_data_frag *dtmp, *dfrag;
 	struct subflow_context *subflow;
 	u64 snd_una = msk->snd_una;
 
@@ -87,7 +115,19 @@ static u64 mptcp_update_msk_una(struct sock *sk)
 		if (after64(subflow_snd_una, snd_una))
 			snd_una = subflow_snd_una;
 	}
-	msk->snd_una = snd_una;
+
+	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
+		if (dfrag->data_seq + dfrag->data_len > snd_una)
+			break;
+
+		list_del(&dfrag->list);
+		put_page(dfrag->page);
+	}
+
+	if (msk->snd_una != snd_una) {
+		msk->snd_una = snd_una;
+		msk->snd_una_tstamp = jiffies;
+	}
 	return snd_una;
 }
 
@@ -98,8 +138,15 @@ static void mptcp_update_subflow_una(struct sock *sk, struct sock *ssk)
 	u64 snd_una = msk->snd_una;
 
 	/* heuristic to avoid doing the expensive data ack update frequently */
-	if (snd_una + tcp_sk(sk)->snd_cwnd <= msk->write_seq)
+	if (snd_una + tcp_sk(sk)->snd_cwnd <= msk->write_seq) {
+		u64 old_snd_una = snd_una;
+
 		snd_una = mptcp_update_msk_una(sk);
+		if (snd_una == msk->write_seq)
+			mptcp_stop_timer(sk);
+		else if (snd_una != old_snd_una)
+			mptcp_reset_timer(sk, mptcp_timeout(sk, ssk));
+	}
 
 	/* if we msk just switched to this subflow, update subflow's una
 	 * to allow 32 bit ack update to be more accurate
@@ -314,9 +361,15 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		copied += ret;
 	}
 	if (copied) {
+		struct inet_connection_sock *icsk = inet_csk(sk);
+
 		ret = copied;
 		tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
 			 size_goal);
+
+		/* start the timer, if it's not pending */
+		if (!icsk->icsk_timeout)
+			mptcp_reset_timer(sk, mptcp_timeout(sk, ssk));
 	}
 
 	release_sock(ssk);
@@ -667,6 +720,36 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return copied;
 }
 
+static void mptcp_retransmit_handler(struct sock *sk)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	mptcp_update_msk_una(sk);
+
+	if (msk->snd_una == msk->write_seq)
+		mptcp_stop_timer(sk);
+	else
+		mptcp_reset_timer(sk, msk->sk.icsk_timeout);
+}
+
+static void mptcp_retransmit_timer(struct timer_list *t)
+{
+	struct inet_connection_sock *icsk = from_timer(icsk, t,
+						       icsk_retransmit_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
+
+	bh_lock_sock(sk);
+	if (!sock_owned_by_user(sk)) {
+		mptcp_retransmit_handler(sk);
+	} else {
+		/* delegate our work to tcp_release_cb() */
+		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED,
+				      &sk->sk_tsq_flags))
+			sock_hold(sk);
+	}
+	bh_unlock_sock(sk);
+}
+
 static int __mptcp_init_sock(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -676,6 +759,9 @@ static int __mptcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&msk->conn_list);
 	INIT_LIST_HEAD(&msk->rtx_queue);
 
+	/* re-use the csk retrans timer for MPTCP-level retrans */
+	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
+
 	return 0;
 }
 
@@ -798,6 +884,8 @@ static void mptcp_destroy(struct sock *sk)
 
 	token_destroy(msk->token);
 
+	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+
 	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
 		list_del(&dfrag->list);
 		put_page(dfrag->page);
@@ -858,6 +946,30 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
 	return 0;
 }
 
+#define MPTCP_DEFERRED_ALL TCPF_WRITE_TIMER_DEFERRED
+
+/* this is very alike tcp_release_cb() but we must handle differently a
+ * different set of events
+ */
+void mptcp_release_cb(struct sock *sk)
+{
+	unsigned long flags, nflags;
+
+	do {
+		flags = sk->sk_tsq_flags;
+		if (!(flags & MPTCP_DEFERRED_ALL))
+			return;
+		nflags = flags & ~MPTCP_DEFERRED_ALL;
+	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
+
+	sock_release_ownership(sk);
+
+	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
+		mptcp_retransmit_handler(sk);
+		__sock_put(sk);
+	}
+}
+
 static int mptcp_get_port(struct sock *sk, unsigned short snum)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 889974a4022c..0a6c7c39fc8b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -125,6 +125,7 @@ struct mptcp_sock {
 	u64		write_seq;
 	u64		ack_seq;
 	u64		snd_una;
+	unsigned long	snd_una_tstamp;
 	u32		token;
 	u16		dport;
 	struct list_head conn_list;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [MPTCP] [PRE-RFC 5/6] mptcp: use retransmission timer to update msk una
@ 2019-08-19 11:44 Florian Westphal
  0 siblings, 0 replies; 2+ messages in thread
From: Florian Westphal @ 2019-08-19 11:44 UTC (permalink / raw)
  To: mptcp

[-- Attachment #1: Type: text/plain, Size: 391 bytes --]

Paolo Abeni <pabeni(a)redhat.com> wrote:
> plugging unacked sequence update into sendmsg() is not enough;
> use the msk icsk retransmit timer to trigger una updates. The
> timer freqeuncy is based on the current subflow RTO estimation.

Looks fine to me, I think we will need the timer in any
case to trigger mptcp level retrans so might as well handle the
una update there as well.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-08-19 11:44 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-16 16:48 [MPTCP] [PRE-RFC 5/6] mptcp: use retransmission timer to update msk una Paolo Abeni
2019-08-19 11:44 Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.