All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
@ 2019-06-25 17:19 Josh Hunt
  2019-06-25 20:26 ` Sasha Levin
  0 siblings, 1 reply; 11+ messages in thread
From: Josh Hunt @ 2019-06-25 17:19 UTC (permalink / raw)
  To: gregkh, edumazet; +Cc: stable, jbaron, Josh Hunt

Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:

tcp_fragment() might be called for skbs in the write queue.

Memory limits might have been exceeded because tcp_sendmsg() only
checks limits at full skb (64KB) boundaries.

Therefore, we need to make sure tcp_fragment() wont punish applications
that might have setup very low SO_SNDBUF values.

Backport notes:
Initial version used tcp_queue type which is not present in older kernels,
so added a new arg to tcp_fragment() to determine whether this is a
retransmit or not.

Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory limits")
Signed-off-by: Josh Hunt <johunt@akamai.com>
Reviewed-by: Jason Baron <jbaron@akamai.com>
---

Eric/Greg - This applies on top of v4.14.130. I did not see anything come
through for the older (<4.19) stable kernels yet. Without this change
Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/)
will fail on 4.14 stable kernels, but passes with this change.

 include/net/tcp.h     |  3 ++-
 net/ipv4/tcp_input.c  |  4 ++--
 net/ipv4/tcp_output.c | 16 ++++++++--------
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1179ef4f0768..9d69fefa365c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -554,7 +554,8 @@ void tcp_xmit_retransmit_queue(struct sock *);
 void tcp_simple_retransmit(struct sock *);
 void tcp_enter_recovery(struct sock *sk, bool ece_ack);
 int tcp_trim_head(struct sock *, struct sk_buff *, u32);
-int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
+int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t,
+		 bool retrans);
 
 void tcp_send_probe0(struct sock *);
 void tcp_send_partial(struct sock *);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e080f3b75bd..0fd629587104 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1202,7 +1202,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 		if (pkt_len >= skb->len && !in_sack)
 			return 0;
 
-		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC, true);
 		if (err < 0)
 			return err;
 	}
@@ -2266,7 +2266,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 			/* If needed, chop off the prefix to mark as lost. */
 			lost = (packets - oldcnt) * mss;
 			if (lost < skb->len &&
-			    tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+			    tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC, true) < 0)
 				break;
 			cnt = packets;
 		}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a8772e11dc1c..ca14770dd7ba 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1259,7 +1259,7 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
  * Remember, these are still headerless SKBs at this point.
  */
 int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
-		 unsigned int mss_now, gfp_t gfp)
+		 unsigned int mss_now, gfp_t gfp, bool retrans)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
@@ -1274,7 +1274,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+	if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && retrans)) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
 		return -ENOMEM;
 	}
@@ -1834,7 +1834,7 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
  * packet has never been sent out before (and thus is not cloned).
  */
 static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
-			unsigned int mss_now, gfp_t gfp)
+			unsigned int mss_now, gfp_t gfp, bool retrans)
 {
 	struct sk_buff *buff;
 	int nlen = skb->len - len;
@@ -1842,7 +1842,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* All of a TSO frame must be composed of paged data.  */
 	if (skb->len != skb->data_len)
-		return tcp_fragment(sk, skb, len, mss_now, gfp);
+		return tcp_fragment(sk, skb, len, mss_now, gfp, retrans);
 
 	buff = sk_stream_alloc_skb(sk, 0, gfp, true);
 	if (unlikely(!buff))
@@ -2361,7 +2361,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 						    nonagle);
 
 		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp, false)))
 			break;
 
 		if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2514,7 +2514,7 @@ void tcp_send_loss_probe(struct sock *sk)
 
 	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
 		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
-					  GFP_ATOMIC)))
+					  GFP_ATOMIC, true)))
 			goto rearm_timer;
 		skb = tcp_write_queue_next(sk, skb);
 	}
@@ -2874,7 +2874,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 
 	len = cur_mss * segs;
 	if (skb->len > len) {
-		if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+		if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC, true))
 			return -ENOMEM; /* We'll try again later. */
 	} else {
 		if (skb_unclone(skb, GFP_ATOMIC))
@@ -3696,7 +3696,7 @@ int tcp_write_wakeup(struct sock *sk, int mib)
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-			if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+			if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC, false))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(skb, mss);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 17:19 [PATCH 4.14] tcp: refine memory limit test in tcp_fragment() Josh Hunt
@ 2019-06-25 20:26 ` Sasha Levin
  2019-06-25 20:29   ` Josh Hunt
  0 siblings, 1 reply; 11+ messages in thread
From: Sasha Levin @ 2019-06-25 20:26 UTC (permalink / raw)
  To: Josh Hunt; +Cc: gregkh, edumazet, stable, jbaron

On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
>Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:

You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.

>tcp_fragment() might be called for skbs in the write queue.
>
>Memory limits might have been exceeded because tcp_sendmsg() only
>checks limits at full skb (64KB) boundaries.
>
>Therefore, we need to make sure tcp_fragment() wont punish applications
>that might have setup very low SO_SNDBUF values.
>
>Backport notes:
>Initial version used tcp_queue type which is not present in older kernels,
>so added a new arg to tcp_fragment() to determine whether this is a
>retransmit or not.
>
>Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory limits")
>Signed-off-by: Josh Hunt <johunt@akamai.com>
>Reviewed-by: Jason Baron <jbaron@akamai.com>
>---
>
>Eric/Greg - This applies on top of v4.14.130. I did not see anything come
>through for the older (<4.19) stable kernels yet. Without this change
>Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/)
>will fail on 4.14 stable kernels, but passes with this change.

Eric, it would be great if you could Ack this, it's very different from
your original patch.

--
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 20:26 ` Sasha Levin
@ 2019-06-25 20:29   ` Josh Hunt
  2019-06-25 22:18     ` Greg KH
  2019-06-25 22:40     ` Sasha Levin
  0 siblings, 2 replies; 11+ messages in thread
From: Josh Hunt @ 2019-06-25 20:29 UTC (permalink / raw)
  To: Sasha Levin; +Cc: gregkh, edumazet, stable, jbaron

On 6/25/19 1:26 PM, Sasha Levin wrote:
> On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
>> Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
> 
> You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.

I wasn't sure if I should reference the upstream commit or stable 
commit. dad3a9314 is the version of the commit from linux-4.14.y. There 
may be a similar issue with the Fixes tag below since that also 
references the 4.14 vers of the change.

> 
>> tcp_fragment() might be called for skbs in the write queue.
>>
>> Memory limits might have been exceeded because tcp_sendmsg() only
>> checks limits at full skb (64KB) boundaries.
>>
>> Therefore, we need to make sure tcp_fragment() wont punish applications
>> that might have setup very low SO_SNDBUF values.
>>
>> Backport notes:
>> Initial version used tcp_queue type which is not present in older 
>> kernels,
>> so added a new arg to tcp_fragment() to determine whether this is a
>> retransmit or not.
>>
>> Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory 
>> limits")
>> Signed-off-by: Josh Hunt <johunt@akamai.com>
>> Reviewed-by: Jason Baron <jbaron@akamai.com>
>> ---
>>
>> Eric/Greg - This applies on top of v4.14.130. I did not see anything come
>> through for the older (<4.19) stable kernels yet. Without this change
>> Christoph Paasch's packetrill script 
>> (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/) 
>>
>> will fail on 4.14 stable kernels, but passes with this change.
> 
> Eric, it would be great if you could Ack this, it's very different from
> your original patch.

Yes, that would be great.

Josh

> 
> -- 
> Thanks,
> Sasha

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 20:29   ` Josh Hunt
@ 2019-06-25 22:18     ` Greg KH
  2019-06-25 22:49       ` Josh Hunt
  2019-06-25 22:40     ` Sasha Levin
  1 sibling, 1 reply; 11+ messages in thread
From: Greg KH @ 2019-06-25 22:18 UTC (permalink / raw)
  To: Josh Hunt; +Cc: Sasha Levin, edumazet, stable, jbaron

On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
> On 6/25/19 1:26 PM, Sasha Levin wrote:
> > On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
> > > Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
> > 
> > You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
> 
> I wasn't sure if I should reference the upstream commit or stable commit.

The upstream commit please.

> dad3a9314 is the version of the commit from linux-4.14.y. There may be a
> similar issue with the Fixes tag below since that also references the 4.14
> vers of the change.
> 
> > 
> > > tcp_fragment() might be called for skbs in the write queue.
> > > 
> > > Memory limits might have been exceeded because tcp_sendmsg() only
> > > checks limits at full skb (64KB) boundaries.
> > > 
> > > Therefore, we need to make sure tcp_fragment() wont punish applications
> > > that might have setup very low SO_SNDBUF values.
> > > 
> > > Backport notes:
> > > Initial version used tcp_queue type which is not present in older
> > > kernels,
> > > so added a new arg to tcp_fragment() to determine whether this is a
> > > retransmit or not.
> > > 
> > > Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory
> > > limits")
> > > Signed-off-by: Josh Hunt <johunt@akamai.com>
> > > Reviewed-by: Jason Baron <jbaron@akamai.com>
> > > ---
> > > 
> > > Eric/Greg - This applies on top of v4.14.130. I did not see anything come
> > > through for the older (<4.19) stable kernels yet. Without this change
> > > Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/)
> > > 
> > > will fail on 4.14 stable kernels, but passes with this change.
> > 
> > Eric, it would be great if you could Ack this, it's very different from
> > your original patch.
> 
> Yes, that would be great.

I would prefer if this looks a bit more like the upstream fix, perhaps a
backport of the function that added the "direction" of the packet first,
and then Eric's patch?  As it is, this patch adds a different parameter
to the function than what is in Linus's tree, and I bet will cause
problems at some later point in time.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 20:29   ` Josh Hunt
  2019-06-25 22:18     ` Greg KH
@ 2019-06-25 22:40     ` Sasha Levin
  2019-06-25 22:53       ` Josh Hunt
  2019-06-26  0:50       ` Greg KH
  1 sibling, 2 replies; 11+ messages in thread
From: Sasha Levin @ 2019-06-25 22:40 UTC (permalink / raw)
  To: Josh Hunt; +Cc: gregkh, edumazet, stable, jbaron

On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
>On 6/25/19 1:26 PM, Sasha Levin wrote:
>>On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
>>>Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
>>
>>You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
>
>I wasn't sure if I should reference the upstream commit or stable 
>commit. dad3a9314 is the version of the commit from linux-4.14.y. 
>There may be a similar issue with the Fixes tag below since that also 
>references the 4.14 vers of the change.

We try to just reference upstream commits when possible. I can edit
these if this patch will be merged.

--
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 22:18     ` Greg KH
@ 2019-06-25 22:49       ` Josh Hunt
  2019-06-26  0:48         ` Greg KH
  0 siblings, 1 reply; 11+ messages in thread
From: Josh Hunt @ 2019-06-25 22:49 UTC (permalink / raw)
  To: Greg KH; +Cc: Sasha Levin, edumazet, stable, jbaron

On 6/25/19 3:18 PM, Greg KH wrote:
> On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
>> On 6/25/19 1:26 PM, Sasha Levin wrote:
>>> On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
>>>> Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
>>>
>>> You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
>>
>> I wasn't sure if I should reference the upstream commit or stable commit.
> 
> The upstream commit please.

Thanks. I'll fix for next version.

> 
>> dad3a9314 is the version of the commit from linux-4.14.y. There may be a
>> similar issue with the Fixes tag below since that also references the 4.14
>> vers of the change.
>>
>>>
>>>> tcp_fragment() might be called for skbs in the write queue.
>>>>
>>>> Memory limits might have been exceeded because tcp_sendmsg() only
>>>> checks limits at full skb (64KB) boundaries.
>>>>
>>>> Therefore, we need to make sure tcp_fragment() wont punish applications
>>>> that might have setup very low SO_SNDBUF values.
>>>>
>>>> Backport notes:
>>>> Initial version used tcp_queue type which is not present in older
>>>> kernels,
>>>> so added a new arg to tcp_fragment() to determine whether this is a
>>>> retransmit or not.
>>>>
>>>> Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory
>>>> limits")
>>>> Signed-off-by: Josh Hunt <johunt@akamai.com>
>>>> Reviewed-by: Jason Baron <jbaron@akamai.com>
>>>> ---
>>>>
>>>> Eric/Greg - This applies on top of v4.14.130. I did not see anything come
>>>> through for the older (<4.19) stable kernels yet. Without this change
>>>> Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/)
>>>>
>>>> will fail on 4.14 stable kernels, but passes with this change.
>>>
>>> Eric, it would be great if you could Ack this, it's very different from
>>> your original patch.
>>
>> Yes, that would be great.
> 
> I would prefer if this looks a bit more like the upstream fix, perhaps a
> backport of the function that added the "direction" of the packet first,
> and then Eric's patch?  As it is, this patch adds a different parameter
> to the function than what is in Linus's tree, and I bet will cause
> problems at some later point in time.

The commit which introduced the fn arguments is part of a much larger 
change that created a separate rb-tree for the retransmit queue:

commit 75c119afe14f74b4dd967d75ed9f57ab6c0ef045
Author: Eric Dumazet <edumazet@google.com>
Date:   Thu Oct 5 22:21:27 2017 -0700

     tcp: implement rb-tree based retransmit queue

I can backport the portion of this change which basically does this:

+enum tcp_queue {
+       TCP_FRAG_IN_WRITE_QUEUE,
+       TCP_FRAG_IN_RTX_QUEUE,
+};
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+                struct sk_buff *skb, u32 len,
+                unsigned int mss_now, gfp_t gfp);

and the corresponding call-sites of tcp_fragment(). If we do that then 
Eric's fix (b6653b3629e5b88202be3c9abc44713973f5c4b4) should apply 
cleanly on top of linux-4.14.y. I'm happy to do that if you'd rather go 
that route. If you want the full rb-tree change into 4.14 then I would 
defer that to Eric, but would argue that IMHO is probably too invasive 
of a change for a LTS kernel.

Thanks
Josh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 22:40     ` Sasha Levin
@ 2019-06-25 22:53       ` Josh Hunt
  2019-06-26  0:50       ` Greg KH
  1 sibling, 0 replies; 11+ messages in thread
From: Josh Hunt @ 2019-06-25 22:53 UTC (permalink / raw)
  To: Sasha Levin; +Cc: gregkh, edumazet, stable, jbaron

On 6/25/19 3:40 PM, Sasha Levin wrote:
> On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
>> On 6/25/19 1:26 PM, Sasha Levin wrote:
>>> On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
>>>> Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
>>>
>>> You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
>>
>> I wasn't sure if I should reference the upstream commit or stable 
>> commit. dad3a9314 is the version of the commit from linux-4.14.y. 
>> There may be a similar issue with the Fixes tag below since that also 
>> references the 4.14 vers of the change.
> 
> We try to just reference upstream commits when possible. I can edit
> these if this patch will be merged.

Thanks for the help. I'll remember for next time :)

Josh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 22:49       ` Josh Hunt
@ 2019-06-26  0:48         ` Greg KH
  2019-06-26  8:29           ` Greg KH
  0 siblings, 1 reply; 11+ messages in thread
From: Greg KH @ 2019-06-26  0:48 UTC (permalink / raw)
  To: Josh Hunt; +Cc: Sasha Levin, edumazet, stable, jbaron

On Tue, Jun 25, 2019 at 03:49:33PM -0700, Josh Hunt wrote:
> On 6/25/19 3:18 PM, Greg KH wrote:
> > On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
> > > On 6/25/19 1:26 PM, Sasha Levin wrote:
> > > > On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
> > > > > Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
> > > > 
> > > > You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
> > > 
> > > I wasn't sure if I should reference the upstream commit or stable commit.
> > 
> > The upstream commit please.
> 
> Thanks. I'll fix for next version.
> 
> > 
> > > dad3a9314 is the version of the commit from linux-4.14.y. There may be a
> > > similar issue with the Fixes tag below since that also references the 4.14
> > > vers of the change.
> > > 
> > > > 
> > > > > tcp_fragment() might be called for skbs in the write queue.
> > > > > 
> > > > > Memory limits might have been exceeded because tcp_sendmsg() only
> > > > > checks limits at full skb (64KB) boundaries.
> > > > > 
> > > > > Therefore, we need to make sure tcp_fragment() wont punish applications
> > > > > that might have setup very low SO_SNDBUF values.
> > > > > 
> > > > > Backport notes:
> > > > > Initial version used tcp_queue type which is not present in older
> > > > > kernels,
> > > > > so added a new arg to tcp_fragment() to determine whether this is a
> > > > > retransmit or not.
> > > > > 
> > > > > Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory
> > > > > limits")
> > > > > Signed-off-by: Josh Hunt <johunt@akamai.com>
> > > > > Reviewed-by: Jason Baron <jbaron@akamai.com>
> > > > > ---
> > > > > 
> > > > > Eric/Greg - This applies on top of v4.14.130. I did not see anything come
> > > > > through for the older (<4.19) stable kernels yet. Without this change
> > > > > Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/)
> > > > > 
> > > > > will fail on 4.14 stable kernels, but passes with this change.
> > > > 
> > > > Eric, it would be great if you could Ack this, it's very different from
> > > > your original patch.
> > > 
> > > Yes, that would be great.
> > 
> > I would prefer if this looks a bit more like the upstream fix, perhaps a
> > backport of the function that added the "direction" of the packet first,
> > and then Eric's patch?  As it is, this patch adds a different parameter
> > to the function than what is in Linus's tree, and I bet will cause
> > problems at some later point in time.
> 
> The commit which introduced the fn arguments is part of a much larger change
> that created a separate rb-tree for the retransmit queue:
> 
> commit 75c119afe14f74b4dd967d75ed9f57ab6c0ef045
> Author: Eric Dumazet <edumazet@google.com>
> Date:   Thu Oct 5 22:21:27 2017 -0700
> 
>     tcp: implement rb-tree based retransmit queue
> 
> I can backport the portion of this change which basically does this:
> 
> +enum tcp_queue {
> +       TCP_FRAG_IN_WRITE_QUEUE,
> +       TCP_FRAG_IN_RTX_QUEUE,
> +};
> +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
> +                struct sk_buff *skb, u32 len,
> +                unsigned int mss_now, gfp_t gfp);
> 
> and the corresponding call-sites of tcp_fragment(). If we do that then
> Eric's fix (b6653b3629e5b88202be3c9abc44713973f5c4b4) should apply cleanly
> on top of linux-4.14.y. I'm happy to do that if you'd rather go that route.

Yes, that is what I was thinking of, thanks.  You expressed it much
better than I could have before my morning coffee :)

> If you want the full rb-tree change into 4.14 then I would defer that to
> Eric, but would argue that IMHO is probably too invasive of a change for a
> LTS kernel.

No, I don't think we should do that work for 4.14.

Also, your change would be suitable for backporting to the older stable
kernels that also need this (4.9.y and 4.4.y.)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-25 22:40     ` Sasha Levin
  2019-06-25 22:53       ` Josh Hunt
@ 2019-06-26  0:50       ` Greg KH
  1 sibling, 0 replies; 11+ messages in thread
From: Greg KH @ 2019-06-26  0:50 UTC (permalink / raw)
  To: Sasha Levin; +Cc: Josh Hunt, edumazet, stable, jbaron

On Tue, Jun 25, 2019 at 06:40:50PM -0400, Sasha Levin wrote:
> On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
> > On 6/25/19 1:26 PM, Sasha Levin wrote:
> > > On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
> > > > Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
> > > 
> > > You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
> > 
> > I wasn't sure if I should reference the upstream commit or stable
> > commit. dad3a9314 is the version of the commit from linux-4.14.y. There
> > may be a similar issue with the Fixes tag below since that also
> > references the 4.14 vers of the change.
> 
> We try to just reference upstream commits when possible. I can edit
> these if this patch will be merged.

I think for this issue we will do as was mentioned in other responses in
this thread.

But, to the topic of original sha1 ids, I think I need to document how
we have been using these in a much better way.  It's a very powerful way
to determine what is fixed where and allows people to properly audit
fixes and how they propagate around different kernel trees, distros
included.

I've been talking to other projects and they like how we have been doing
this and want to copy it, so I guess it's time to actually describe what
we do here :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-26  0:48         ` Greg KH
@ 2019-06-26  8:29           ` Greg KH
  2019-06-26 16:28             ` Josh Hunt
  0 siblings, 1 reply; 11+ messages in thread
From: Greg KH @ 2019-06-26  8:29 UTC (permalink / raw)
  To: Josh Hunt; +Cc: Sasha Levin, edumazet, stable, jbaron

On Wed, Jun 26, 2019 at 08:48:46AM +0800, Greg KH wrote:
> On Tue, Jun 25, 2019 at 03:49:33PM -0700, Josh Hunt wrote:
> > On 6/25/19 3:18 PM, Greg KH wrote:
> > > On Tue, Jun 25, 2019 at 01:29:35PM -0700, Josh Hunt wrote:
> > > > On 6/25/19 1:26 PM, Sasha Levin wrote:
> > > > > On Tue, Jun 25, 2019 at 01:19:37PM -0400, Josh Hunt wrote:
> > > > > > Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376:
> > > > > 
> > > > > You probably meant b6653b3629e5b88202be3c9abc44713973f5c4b4 here.
> > > > 
> > > > I wasn't sure if I should reference the upstream commit or stable commit.
> > > 
> > > The upstream commit please.
> > 
> > Thanks. I'll fix for next version.
> > 
> > > 
> > > > dad3a9314 is the version of the commit from linux-4.14.y. There may be a
> > > > similar issue with the Fixes tag below since that also references the 4.14
> > > > vers of the change.
> > > > 
> > > > > 
> > > > > > tcp_fragment() might be called for skbs in the write queue.
> > > > > > 
> > > > > > Memory limits might have been exceeded because tcp_sendmsg() only
> > > > > > checks limits at full skb (64KB) boundaries.
> > > > > > 
> > > > > > Therefore, we need to make sure tcp_fragment() wont punish applications
> > > > > > that might have setup very low SO_SNDBUF values.
> > > > > > 
> > > > > > Backport notes:
> > > > > > Initial version used tcp_queue type which is not present in older
> > > > > > kernels,
> > > > > > so added a new arg to tcp_fragment() to determine whether this is a
> > > > > > retransmit or not.
> > > > > > 
> > > > > > Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory
> > > > > > limits")
> > > > > > Signed-off-by: Josh Hunt <johunt@akamai.com>
> > > > > > Reviewed-by: Jason Baron <jbaron@akamai.com>
> > > > > > ---
> > > > > > 
> > > > > > Eric/Greg - This applies on top of v4.14.130. I did not see anything come
> > > > > > through for the older (<4.19) stable kernels yet. Without this change
> > > > > > Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@mail.gmail.com/)
> > > > > > 
> > > > > > will fail on 4.14 stable kernels, but passes with this change.
> > > > > 
> > > > > Eric, it would be great if you could Ack this, it's very different from
> > > > > your original patch.
> > > > 
> > > > Yes, that would be great.
> > > 
> > > I would prefer if this looks a bit more like the upstream fix, perhaps a
> > > backport of the function that added the "direction" of the packet first,
> > > and then Eric's patch?  As it is, this patch adds a different parameter
> > > to the function than what is in Linus's tree, and I bet will cause
> > > problems at some later point in time.
> > 
> > The commit which introduced the fn arguments is part of a much larger change
> > that created a separate rb-tree for the retransmit queue:
> > 
> > commit 75c119afe14f74b4dd967d75ed9f57ab6c0ef045
> > Author: Eric Dumazet <edumazet@google.com>
> > Date:   Thu Oct 5 22:21:27 2017 -0700
> > 
> >     tcp: implement rb-tree based retransmit queue
> > 
> > I can backport the portion of this change which basically does this:
> > 
> > +enum tcp_queue {
> > +       TCP_FRAG_IN_WRITE_QUEUE,
> > +       TCP_FRAG_IN_RTX_QUEUE,
> > +};
> > +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
> > +                struct sk_buff *skb, u32 len,
> > +                unsigned int mss_now, gfp_t gfp);
> > 
> > and the corresponding call-sites of tcp_fragment(). If we do that then
> > Eric's fix (b6653b3629e5b88202be3c9abc44713973f5c4b4) should apply cleanly
> > on top of linux-4.14.y. I'm happy to do that if you'd rather go that route.
> 
> Yes, that is what I was thinking of, thanks.  You expressed it much
> better than I could have before my morning coffee :)
> 
> > If you want the full rb-tree change into 4.14 then I would defer that to
> > Eric, but would argue that IMHO is probably too invasive of a change for a
> > LTS kernel.
> 
> No, I don't think we should do that work for 4.14.
> 
> Also, your change would be suitable for backporting to the older stable
> kernels that also need this (4.9.y and 4.4.y.)

Nevermind with this, in another email thread Eric provided a simpler
patch which I have now just queued up to the stable kernel trees.

I'll probably just do a quick round of review/releases now for this
issue as people are hitting it already.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4.14] tcp: refine memory limit test in tcp_fragment()
  2019-06-26  8:29           ` Greg KH
@ 2019-06-26 16:28             ` Josh Hunt
  0 siblings, 0 replies; 11+ messages in thread
From: Josh Hunt @ 2019-06-26 16:28 UTC (permalink / raw)
  To: Greg KH; +Cc: Sasha Levin, edumazet, stable, jbaron

On 6/26/19 1:29 AM, Greg KH wrote:
> 
> Nevermind with this, in another email thread Eric provided a simpler
> patch which I have now just queued up to the stable kernel trees.
> 
> I'll probably just do a quick round of review/releases now for this
> issue as people are hitting it already.
> 

Sounds great. Thanks Eric!

Josh

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-06-26 16:32 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-25 17:19 [PATCH 4.14] tcp: refine memory limit test in tcp_fragment() Josh Hunt
2019-06-25 20:26 ` Sasha Levin
2019-06-25 20:29   ` Josh Hunt
2019-06-25 22:18     ` Greg KH
2019-06-25 22:49       ` Josh Hunt
2019-06-26  0:48         ` Greg KH
2019-06-26  8:29           ` Greg KH
2019-06-26 16:28             ` Josh Hunt
2019-06-25 22:40     ` Sasha Levin
2019-06-25 22:53       ` Josh Hunt
2019-06-26  0:50       ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.