All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Neal Cardwell <ncardwell@google.com>,
	Yuchung Cheng <ycheng@google.com>,
	Eric Dumazet <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.4 07/23] tcp: do not cancel delay-AcK on DCTCP special ACK
Date: Fri, 27 Jul 2018 12:09:08 +0200	[thread overview]
Message-ID: <20180727100845.455862916@linuxfoundation.org> (raw)
In-Reply-To: <20180727100845.054377089@linuxfoundation.org>

4.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Yuchung Cheng <ycheng@google.com>

[ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ]

Currently when a DCTCP receiver delays an ACK and receive a
data packet with a different CE mark from the previous one's, it
sends two immediate ACKs acking previous and latest sequences
respectly (for ECN accounting).

Previously sending the first ACK may mark off the delayed ACK timer
(tcp_event_ack_sent). This may subsequently prevent sending the
second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
The culprit is that tcp_send_ack() assumes it always acknowleges
the latest sequence, which is not true for the first special ACK.

The fix is to not make the assumption in tcp_send_ack and check the
actual ack sequence before cancelling the delayed ACK. Further it's
safer to pass the ack sequence number as a local variable into
tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
future bugs like this.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h     |    1 +
 net/ipv4/tcp_dctcp.c  |   34 ++++------------------------------
 net/ipv4/tcp_output.c |   11 ++++++++---
 3 files changed, 13 insertions(+), 33 deletions(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -559,6 +559,7 @@ void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
 void tcp_push_one(struct sock *, unsigned int mss_now);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -135,21 +135,8 @@ static void dctcp_ce_state_0_to_1(struct
 	 * ACK has not sent yet.
 	 */
 	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=0. */
-		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -166,21 +153,8 @@ static void dctcp_ce_state_1_to_0(struct
 	 * ACK has not sent yet.
 	 */
 	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=1. */
-		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -177,8 +177,13 @@ static void tcp_event_data_sent(struct t
 }
 
 /* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+				      u32 rcv_nxt)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (unlikely(rcv_nxt != tp->rcv_nxt))
+		return;  /* Special ACK sent by DCTCP to reflect ECN */
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
@@ -1005,7 +1010,7 @@ static int __tcp_transmit_skb(struct soc
 	icsk->icsk_af_ops->send_check(sk, skb);
 
 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
-		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
 
 	if (skb->len != tcp_header_size)
 		tcp_event_data_sent(tp, sk);
@@ -3400,12 +3405,12 @@ void __tcp_send_ack(struct sock *sk, u32
 	skb_mstamp_get(&buff->skb_mstamp);
 	__tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
 }
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
 
 void tcp_send_ack(struct sock *sk)
 {
 	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
-EXPORT_SYMBOL_GPL(tcp_send_ack);
 
 /* This routine sends a packet with an out of date sequence
  * number. It assumes the other end will try to ack it.



  parent reply	other threads:[~2018-07-27 10:12 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-27 10:09 [PATCH 4.4 00/23] 4.4.145-stable review Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 01/23] MIPS: ath79: fix register address in ath79_ddr_wb_flush() Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 02/23] ip: hash fragments consistently Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 03/23] net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 04/23] rtnetlink: add rtnl_link_state check in rtnl_configure_link Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 05/23] tcp: fix dctcp delayed ACK schedule Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 06/23] tcp: helpers to send special DCTCP ack Greg Kroah-Hartman
2018-07-27 10:09 ` Greg Kroah-Hartman [this message]
2018-07-27 10:09 ` [PATCH 4.4 08/23] tcp: do not delay ACK in DCTCP upon CE status change Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 09/23] tcp: avoid collapses in tcp_prune_queue() if possible Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 10/23] tcp: detect malicious patterns in tcp_collapse_ofo_queue() Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 11/23] ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 12/23] usb: cdc_acm: Add quirk for Castles VEGA3000 Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 13/23] usb: core: handle hub C_PORT_OVER_CURRENT condition Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 14/23] usb: gadget: f_fs: Only return delayed status when len is 0 Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 15/23] driver core: Partially revert "driver core: correct devices shutdown order" Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 16/23] can: xilinx_can: fix RX loop if RXNEMP is asserted without RXOK Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 17/23] can: xilinx_can: fix recovery from error states not being propagated Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 18/23] can: xilinx_can: fix device dropping off bus on RX overrun Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 19/23] can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 20/23] can: xilinx_can: fix incorrect clear of non-processed interrupts Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 21/23] can: xilinx_can: fix RX overflow interrupt not being enabled Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 22/23] turn off -Wattribute-alias Greg Kroah-Hartman
2018-07-27 10:09 ` [PATCH 4.4 23/23] ARM: fix put_user() for gcc-8 Greg Kroah-Hartman
2018-07-27 12:22 ` [PATCH 4.4 00/23] 4.4.145-stable review Nathan Chancellor
2018-07-27 17:28 ` Guenter Roeck
2018-07-27 20:06 ` Shuah Khan
2018-07-28  6:55 ` Naresh Kamboju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180727100845.455862916@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ncardwell@google.com \
    --cc=stable@vger.kernel.org \
    --cc=ycheng@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.