All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: netdev@vger.kernel.org
Cc: dhowells@redhat.com, linux-afs@lists.infradead.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH net-next 25/26] rxrpc: Fix congestion management
Date: Tue, 08 Nov 2022 22:20:33 +0000	[thread overview]
Message-ID: <166794603342.2389296.12363215406403504827.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <166794587113.2389296.16484814996876530222.stgit@warthog.procyon.org.uk>

rxrpc has a problem in its congestion management in that it saves the
congestion window size (cwnd) from one call to another, but if this is 0 at
the time is saved, then the next call may not actually manage to ever
transmit anything.

To this end:

 (1) Don't save cwnd between calls, but rather reset back down to the
     initial cwnd and re-enter slow-start if data transmission is idle for
     more than an RTT.

 (2) Preserve ssthresh instead, as that is a handy estimate of pipe
     capacity.  Knowing roughly when to stop slow start and enter
     congestion avoidance can reduce the tendency to overshoot and drop
     larger amounts of packets when probing.

In future, cwind growth also needs to be constrained when the window isn't
being filled due to being application limited.

Reported-by: Simon Wilkinson <sxw@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---

 include/trace/events/rxrpc.h |    1 +
 net/rxrpc/ar-internal.h      |    9 +++++----
 net/rxrpc/call_accept.c      |    3 ++-
 net/rxrpc/call_object.c      |    7 ++++++-
 net/rxrpc/conn_client.c      |    3 ++-
 net/rxrpc/conn_object.c      |    2 +-
 net/rxrpc/input.c            |   21 ++++++++++++++++++++-
 net/rxrpc/output.c           |    1 +
 net/rxrpc/peer_object.c      |    7 +------
 net/rxrpc/proc.c             |    4 ++--
 net/rxrpc/sendmsg.c          |   22 +++++++++++++++++++---
 11 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index a11de55c3c14..b9886d1df825 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -193,6 +193,7 @@
 	EM(rxrpc_cong_new_low_nack,		" NewLowN") \
 	EM(rxrpc_cong_no_change,		" -") \
 	EM(rxrpc_cong_progress,			" Progres") \
+	EM(rxrpc_cong_idle_reset,		" IdleRes") \
 	EM(rxrpc_cong_retransmit_again,		" ReTxAgn") \
 	EM(rxrpc_cong_rtt_window_end,		" RttWinE") \
 	E_(rxrpc_cong_saw_nack,			" SawNack")
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 775eb91aabb2..6bbe28ecf583 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -332,7 +332,7 @@ struct rxrpc_peer {
 	u32			rto_j;		/* Retransmission timeout in jiffies */
 	u8			backoff;	/* Backoff timeout */
 
-	u8			cong_cwnd;	/* Congestion window size */
+	u8			cong_ssthresh;	/* Congestion slow-start threshold */
 };
 
 /*
@@ -626,6 +626,7 @@ struct rxrpc_call {
 	u16			tx_backoff;	/* Delay to insert due to Tx failure */
 	u8			tx_winsize;	/* Maximum size of Tx window */
 #define RXRPC_TX_MAX_WINDOW	128
+	ktime_t			tx_last_sent;	/* Last time a transmission occurred */
 
 	/* Received data tracking */
 	struct sk_buff_head	recvmsg_queue;	/* Queue of packets ready for recvmsg() */
@@ -687,10 +688,10 @@ struct rxrpc_call {
  * Summary of a new ACK and the changes it made to the Tx buffer packet states.
  */
 struct rxrpc_ack_summary {
+	u16			nr_acks;		/* Number of ACKs in packet */
+	u16			nr_new_acks;		/* Number of new ACKs in packet */
+	u16			nr_rot_new_acks;	/* Number of rotated new ACKs */
 	u8			ack_reason;
-	u8			nr_acks;		/* Number of ACKs in packet */
-	u8			nr_new_acks;		/* Number of new ACKs in packet */
-	u8			nr_rot_new_acks;	/* Number of rotated new ACKs */
 	bool			saw_nacks;		/* Saw NACKs in packet */
 	bool			new_low_nack;		/* T if new low NACK found */
 	bool			retrans_timeo;		/* T if reTx due to timeout happened */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index d8db277d5ebe..48790ee77019 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -324,7 +324,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 	call->security = conn->security;
 	call->security_ix = conn->security_ix;
 	call->peer = rxrpc_get_peer(conn->params.peer);
-	call->cong_cwnd = call->peer->cong_cwnd;
+	call->cong_ssthresh = call->peer->cong_ssthresh;
+	call->tx_last_sent = ktime_get_real();
 	return call;
 }
 
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index aa19daaa487b..1befe22cd301 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -166,7 +166,12 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
 	call->rx_winsize = rxrpc_rx_window_size;
 	call->tx_winsize = 16;
 
-	call->cong_cwnd = 2;
+	if (RXRPC_TX_SMSS > 2190)
+		call->cong_cwnd = 2;
+	else if (RXRPC_TX_SMSS > 1095)
+		call->cong_cwnd = 3;
+	else
+		call->cong_cwnd = 4;
 	call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
 
 	call->rxnet = rxnet;
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3c9eeb5b750c..f020f308ed9e 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -363,7 +363,8 @@ static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
 	if (!cp->peer)
 		goto error;
 
-	call->cong_cwnd = cp->peer->cong_cwnd;
+	call->tx_last_sent = ktime_get_real();
+	call->cong_ssthresh = cp->peer->cong_ssthresh;
 	if (call->cong_cwnd >= call->cong_ssthresh)
 		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
 	else
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index f7ea71ae6159..156bd26daf74 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -207,7 +207,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 {
 	struct rxrpc_connection *conn = call->conn;
 
-	call->peer->cong_cwnd = call->cong_cwnd;
+	call->peer->cong_ssthresh = call->cong_ssthresh;
 
 	if (!hlist_unhashed(&call->error_link)) {
 		spin_lock_bh(&call->peer->lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 5c17fed4b60f..bdf70b81addc 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -58,6 +58,25 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
 	summary->cumulative_acks = cumulative_acks;
 	summary->dup_acks = call->cong_dup_acks;
 
+	/* If we haven't transmitted anything for >1RTT, we should reset the
+	 * congestion management state.
+	 */
+	if ((call->cong_mode == RXRPC_CALL_SLOW_START ||
+	     call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) &&
+	    ktime_before(ktime_add_us(call->tx_last_sent,
+				      call->peer->srtt_us >> 3),
+			 ktime_get_real())
+	    ) {
+		change = rxrpc_cong_idle_reset;
+		summary->mode = RXRPC_CALL_SLOW_START;
+		if (RXRPC_TX_SMSS > 2190)
+			summary->cwnd = 2;
+		else if (RXRPC_TX_SMSS > 1095)
+			summary->cwnd = 3;
+		else
+			summary->cwnd = 4;
+	}
+
 	switch (call->cong_mode) {
 	case RXRPC_CALL_SLOW_START:
 		if (summary->saw_nacks)
@@ -205,7 +224,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
 
 	if (call->acks_lowest_nak == call->acks_hard_ack) {
 		call->acks_lowest_nak = to;
-	} else if (before_eq(call->acks_lowest_nak, to)) {
+	} else if (after(to, call->acks_lowest_nak)) {
 		summary->new_low_nack = true;
 		call->acks_lowest_nak = to;
 	}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 2c3f7e4e30d7..46432e70a16b 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -501,6 +501,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
 
 done:
 	if (ret >= 0) {
+		call->tx_last_sent = txb->last_sent;
 		if (txb->wire.flags & RXRPC_REQUEST_ACK) {
 			call->peer->rtt_last_req = txb->last_sent;
 			if (call->peer->rtt_count > 1) {
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 26d2ae9baaf2..041a51225c5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -227,12 +227,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
 
 		rxrpc_peer_init_rtt(peer);
 
-		if (RXRPC_TX_SMSS > 2190)
-			peer->cong_cwnd = 2;
-		else if (RXRPC_TX_SMSS > 1095)
-			peer->cong_cwnd = 3;
-		else
-			peer->cong_cwnd = 4;
+		peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
 		trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
 	}
 
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 0807753ec2dc..fae22a8b38d6 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -217,7 +217,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
 		seq_puts(seq,
 			 "Proto Local                                          "
 			 " Remote                                         "
-			 " Use  CW   MTU LastUse      RTT      RTO\n"
+			 " Use SST   MTU LastUse      RTT      RTO\n"
 			 );
 		return 0;
 	}
@@ -235,7 +235,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
 		   lbuff,
 		   rbuff,
 		   refcount_read(&peer->ref),
-		   peer->cong_cwnd,
+		   peer->cong_ssthresh,
 		   peer->mtu,
 		   now - peer->last_tx_at,
 		   peer->srtt_us >> 3,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9b567aff3e84..e5fd8a95bf71 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -22,11 +22,27 @@
  */
 static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
 {
-	unsigned int win_size =
-		min_t(unsigned int, call->tx_winsize,
-		      call->cong_cwnd + call->cong_extra);
+	unsigned int win_size;
 	rxrpc_seq_t tx_win = smp_load_acquire(&call->acks_hard_ack);
 
+	/* If we haven't transmitted anything for >1RTT, we should reset the
+	 * congestion management state.
+	 */
+	if (ktime_before(ktime_add_us(call->tx_last_sent,
+				      call->peer->srtt_us >> 3),
+			 ktime_get_real())) {
+		if (RXRPC_TX_SMSS > 2190)
+			win_size = 2;
+		else if (RXRPC_TX_SMSS > 1095)
+			win_size = 3;
+		else
+			win_size = 4;
+		win_size += call->cong_extra;
+	} else {
+		win_size = min_t(unsigned int, call->tx_winsize,
+				 call->cong_cwnd + call->cong_extra);
+	}
+
 	if (_tx_win)
 		*_tx_win = tx_win;
 	return call->tx_top - tx_win < win_size;



  parent reply	other threads:[~2022-11-08 22:24 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-08 22:17 [PATCH 00/26] rxrpc: Increasing SACK size and moving away from softirq, part 1 David Howells
2022-11-08 22:17 ` [PATCH net-next 01/26] net, proc: Provide PROC_FS=n fallback for proc_create_net_single_write() David Howells
2022-11-08 22:18 ` [PATCH net-next 02/26] rxrpc: Trace setting of the request-ack flag David Howells
2022-11-08 22:18 ` [PATCH net-next 03/26] rxrpc: Split call timer-expiration from call timer-set tracepoint David Howells
2022-11-08 22:18 ` [PATCH net-next 04/26] rxrpc: Track highest acked serial David Howells
2022-11-08 22:18 ` [PATCH net-next 05/26] rxrpc: Add stats procfile and DATA packet stats David Howells
2022-11-08 22:18 ` [PATCH net-next 06/26] rxrpc: Record statistics about ACK types David Howells
2022-11-08 22:18 ` [PATCH net-next 07/26] rxrpc: Record stats for why the REQUEST-ACK flag is being set David Howells
2022-11-08 22:18 ` [PATCH net-next 08/26] rxrpc: Fix ack.bufferSize to be 0 when generating an ack David Howells
2022-11-08 22:18 ` [PATCH net-next 09/26] net: Change the udp encap_err_rcv to allow use of {ip,ipv6}_icmp_error() David Howells
2022-11-08 22:18 ` [PATCH net-next 10/26] rxrpc: Use the core ICMP/ICMP6 parsers David Howells
2022-11-08 22:19 ` [PATCH net-next 11/26] rxrpc: Call udp_sendmsg() directly David Howells
2022-11-08 22:19 ` [PATCH net-next 12/26] rxrpc: Remove unnecessary header inclusions David Howells
2022-11-08 22:19 ` [PATCH net-next 13/26] rxrpc: Remove the flags from the rxrpc_skb tracepoint David Howells
2022-11-08 22:19 ` [PATCH net-next 14/26] rxrpc: Remove call->tx_phase David Howells
2022-11-08 22:19 ` [PATCH net-next 15/26] rxrpc: Define rxrpc_txbuf struct to carry data to be transmitted David Howells
2022-11-08 22:19 ` [PATCH net-next 16/26] rxrpc: Allocate ACK records at proposal and queue for transmission David Howells
2022-11-08 22:19 ` [PATCH net-next 17/26] rxrpc: Clean up ACK handling David Howells
2022-11-08 22:19 ` [PATCH net-next 18/26] rxrpc: Split the rxrpc_recvmsg tracepoint David Howells
2022-11-08 22:19 ` [PATCH net-next 19/26] rxrpc: Clone received jumbo subpackets and queue separately David Howells
2022-11-08 22:20 ` [PATCH net-next 20/26] rxrpc: Get rid of the Rx ring David Howells
2022-11-08 22:20 ` [PATCH net-next 21/26] rxrpc: Don't use a ring buffer for call Tx queue David Howells
2022-11-08 22:20 ` [PATCH net-next 22/26] rxrpc: Remove call->lock David Howells
2022-11-08 22:20 ` [PATCH net-next 23/26] rxrpc: Save last ACK's SACK table rather than marking txbufs David Howells
2022-11-08 22:20 ` [PATCH net-next 24/26] rxrpc: Remove the rxtx ring David Howells
2022-11-08 22:20 ` David Howells [this message]
2022-11-08 22:20 ` [PATCH net-next 26/26] rxrpc: Allocate an skcipher each time needed rather than reusing David Howells
2022-11-09 14:10 ` [PATCH 00/26] rxrpc: Increasing SACK size and moving away from softirq, part 1 patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166794603342.2389296.12363215406403504827.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.