Netdev Archive on lore.kernel.org
 help / color / Atom feed
From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>
Cc: netdev <netdev@vger.kernel.org>,
	Eric Dumazet <edumazet@google.com>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Jonathan Looney <jtl@netflix.com>,
	Neal Cardwell <ncardwell@google.com>,
	Tyler Hicks <tyhicks@canonical.com>,
	Yuchung Cheng <ycheng@google.com>,
	Bruce Curtis <brucec@netflix.com>,
	Jonathan Lemon <jonathan.lemon@gmail.com>
Subject: [PATCH net 1/4] tcp: limit payload size of sacked skbs
Date: Mon, 17 Jun 2019 10:03:51 -0700
Message-ID: <20190617170354.37770-2-edumazet@google.com> (raw)
In-Reply-To: <20190617170354.37770-1-edumazet@google.com>

Jonathan Looney reported that TCP can trigger the following crash
in tcp_shifted_skb() :

	BUG_ON(tcp_skb_pcount(skb) < pcount);

This can happen if the remote peer has advertized the smallest
MSS that linux TCP accepts : 48

An skb can hold 17 fragments, and each fragment can hold 32KB
on x86, or 64KB on PowerPC.

This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs
can overflow.

Note that tcp_sendmsg() builds skbs with less than 64KB
of payload, so this problem needs SACK to be enabled.
SACK blocks allow TCP to coalesce multiple skbs in the retransmit
queue, thus filling the 17 fragments to maximal capacity.

CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs

Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jonathan Looney <jtl@netflix.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Bruce Curtis <brucec@netflix.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/linux/tcp.h   |  4 ++++
 include/net/tcp.h     |  2 ++
 net/ipv4/tcp.c        |  1 +
 net/ipv4/tcp_input.c  | 26 ++++++++++++++++++++------
 net/ipv4/tcp_output.c |  6 +++---
 5 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 711361af9ce019f08c8b6accc33220b673b34d56..9a478a0cd3a20b40ed344f178e35228a0b8ee203 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -484,4 +484,8 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 
 	return (user_mss && user_mss < mss) ? user_mss : mss;
 }
+
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
+		  int shiftlen);
+
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac2f53fbfa6b4cbf1fc615c952a5e1cac1124300..582c0caa98116740b5bde8c5dbb5d94fc69d1caa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -51,6 +51,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 #define MAX_TCP_HEADER	(128 + MAX_HEADER)
 #define MAX_TCP_OPTION_SPACE 40
+#define TCP_MIN_SND_MSS		48
+#define TCP_MIN_GSO_SIZE	(TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
 
 /*
  * Never offer a window over 32767 without using window scaling. Some
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f448a288d158c4baa6d8d5ed82c4b129404233a1..7dc9ab84bb69aa90953e98f9763287fcee3a1659 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3873,6 +3873,7 @@ void __init tcp_init(void)
 	unsigned long limit;
 	unsigned int i;
 
+	BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38dfc308c0fb9832facadb0aeec8f3e4931901f4..d95ee40df6c2b020d590018bc41833b8a6aefa4a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1302,7 +1302,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
 	TCP_SKB_CB(skb)->seq += shifted;
 
 	tcp_skb_pcount_add(prev, pcount);
-	BUG_ON(tcp_skb_pcount(skb) < pcount);
+	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
 	tcp_skb_pcount_add(skb, -pcount);
 
 	/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1368,6 +1368,21 @@ static int skb_can_shift(const struct sk_buff *skb)
 	return !skb_headlen(skb) && skb_is_nonlinear(skb);
 }
 
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+		  int pcount, int shiftlen)
+{
+	/* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+	 * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+	 * to make sure not storing more than 65535 * 8 bytes per skb,
+	 * even if current MSS is bigger.
+	 */
+	if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+		return 0;
+	if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+		return 0;
+	return skb_shift(to, from, shiftlen);
+}
+
 /* Try collapsing SACK blocks spanning across multiple skbs to a single
  * skb.
  */
@@ -1473,7 +1488,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
 		goto fallback;
 
-	if (!skb_shift(prev, skb, len))
+	if (!tcp_skb_shift(prev, skb, pcount, len))
 		goto fallback;
 	if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
 		goto out;
@@ -1491,11 +1506,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 		goto out;
 
 	len = skb->len;
-	if (skb_shift(prev, skb, len)) {
-		pcount += tcp_skb_pcount(skb);
-		tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+	pcount = tcp_skb_pcount(skb);
+	if (tcp_skb_shift(prev, skb, pcount, len))
+		tcp_shifted_skb(sk, prev, skb, state, pcount,
 				len, mss, 0);
-	}
 
 out:
 	return prev;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f429e856e2631a9e6de1d2e060406742f97e538e..b8e3bbb852117459d131fbb41d69ae63bd251a3e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1454,8 +1454,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
 	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
-	if (mss_now < 48)
-		mss_now = 48;
+	if (mss_now < TCP_MIN_SND_MSS)
+		mss_now = TCP_MIN_SND_MSS;
 	return mss_now;
 }
 
@@ -2747,7 +2747,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 		if (next_skb_size <= skb_availroom(skb))
 			skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
 				      next_skb_size);
-		else if (!skb_shift(skb, next_skb, next_skb_size))
+		else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
 			return false;
 	}
 	tcp_highest_sack_replace(sk, next_skb, skb);
-- 
2.22.0.410.gd8fdbe21b5-goog


  reply index

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-17 17:03 [PATCH net 0/4] tcp: make sack processing more robust Eric Dumazet
2019-06-17 17:03 ` Eric Dumazet [this message]
2019-06-17 17:14   ` [PATCH net 1/4] tcp: limit payload size of sacked skbs Jonathan Lemon
2019-06-17 17:03 ` [PATCH net 2/4] tcp: tcp_fragment() should apply sane memory limits Eric Dumazet
2019-06-17 17:14   ` Jonathan Lemon
2019-06-18  0:18   ` Christoph Paasch
2019-06-18  2:28     ` Eric Dumazet
2019-06-18  3:19       ` Christoph Paasch
2019-06-18  3:44         ` Eric Dumazet
2019-06-18  3:53           ` Christoph Paasch
2019-06-18  4:08             ` Eric Dumazet
2019-07-10 18:23         ` Prout, Andrew - LLSC - MITLL
2019-07-10 18:28           ` Eric Dumazet
2019-07-10 18:53             ` Prout, Andrew - LLSC - MITLL
2019-07-10 19:26               ` Eric Dumazet
2019-07-11  7:28                 ` Christoph Paasch
2019-07-11  9:19                   ` Eric Dumazet
2019-07-11 18:26                     ` Michal Kubecek
2019-07-11 18:50                       ` Eric Dumazet
2019-07-11 10:18                   ` Eric Dumazet
2019-07-11 17:14                 ` Prout, Andrew - LLSC - MITLL
2019-07-11 18:28                   ` Eric Dumazet
2019-07-11 19:04                     ` Jonathan Lemon
2019-07-12  7:05                       ` Eric Dumazet
2019-07-16 15:13                   ` Prout, Andrew - LLSC - MITLL
2019-06-17 17:03 ` [PATCH net 3/4] tcp: add tcp_min_snd_mss sysctl Eric Dumazet
2019-06-17 17:15   ` Jonathan Lemon
2019-06-17 17:18   ` Tyler Hicks
2019-06-17 17:03 ` [PATCH net 4/4] tcp: enforce tcp_min_snd_mss in tcp_mtu_probing() Eric Dumazet
2019-06-17 17:16   ` Jonathan Lemon
2019-06-17 17:18   ` Tyler Hicks
2019-06-17 17:41 ` [PATCH net 0/4] tcp: make sack processing more robust David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190617170354.37770-2-edumazet@google.com \
    --to=edumazet@google.com \
    --cc=brucec@netflix.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=jtl@netflix.com \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=tyhicks@canonical.com \
    --cc=ycheng@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git