All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb()
@ 2015-03-23 10:38 Alexey Kodanev
  2015-03-23 13:52 ` Eric Dumazet
  2015-03-23 14:38 ` Eric Dumazet
  0 siblings, 2 replies; 6+ messages in thread
From: Alexey Kodanev @ 2015-03-23 10:38 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, Alexey Kodanev

Regression introduced by commit 2dc49d1680.

tcp_v6_fill_cb() will be called twice if socket's state changes from
TCP_TIME_WAIT to TCP_LISTEN. That can result in performance loss and
control buffer data corruption because in the second tcp_v6_fill_cb()
it's not copying the 'header' anymore, but 'seq', 'end_seq', etc.

Reproduced with LTP/tcp_fastopen test and netperf -t TCP_CRR, so when
we're constantly closing and opening TCP connections after several
requests/responses from client.

This can be fixed if we move 'header' union back to the beginning of
'struct tcp_skb_cb' and getting skb->next, TCP_SKB_CB(skb)->seq and
TCP_SKB_CB(skb)->end_seq on the same cache line by moving 'cb[48]'
closer to 'skb->next', above the *sk and *dev pointers.

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
 include/linux/skbuff.h |    5 +++--
 include/net/tcp.h      |   17 ++++++++---------
 net/ipv4/tcp_ipv4.c    |    6 ------
 net/ipv4/tcp_output.c  |    4 ----
 net/ipv6/tcp_ipv6.c    |   21 ++++-----------------
 5 files changed, 15 insertions(+), 38 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f54d665..8870d16 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -524,8 +524,6 @@ struct sk_buff {
 		};
 		struct rb_node	rbnode; /* used in netem & tcp stack */
 	};
-	struct sock		*sk;
-	struct net_device	*dev;
 
 	/*
 	 * This is the control buffer. It is free to use for every
@@ -535,6 +533,9 @@ struct sk_buff {
 	 */
 	char			cb[48] __aligned(8);
 
+	struct sock		*sk;
+	struct net_device	*dev;
+
 	unsigned long		_skb_refdst;
 	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_XFRM
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8d6b983..1a5cf12 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -694,6 +694,13 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
  * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
  */
 struct tcp_skb_cb {
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;	/* For incoming frames		*/
+
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	union {
@@ -721,21 +728,13 @@ struct tcp_skb_cb {
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
 	/* 1 byte hole */
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
-	union {
-		struct inet_skb_parm	h4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct inet6_skb_parm	h6;
-#endif
-	} header;	/* For incoming frames		*/
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
 
 #if IS_ENABLED(CONFIG_IPV6)
-/* This is the variant of inet6_iif() that must be used by TCP,
- * as TCP moves IP6CB into a different location in skb->cb[]
- */
+/* This is the variant of inet6_iif() that must be used by TCP */
 static inline int tcp_v6_iif(const struct sk_buff *skb)
 {
 	return TCP_SKB_CB(skb)->header.h6.iif;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed..43d6cd2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1622,12 +1622,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	th = tcp_hdr(skb);
 	iph = ip_hdr(skb);
-	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
-	 * barrier() makes sure compiler wont play fool^Waliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
-		sizeof(struct inet_skb_parm));
-	barrier();
 
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c..7de50d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1011,10 +1011,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* Our usage of tstamp should remain private */
 	skb->tstamp.tv64 = 0;
 
-	/* Cleanup our debris for IP stacks */
-	memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
-			       sizeof(struct inet6_skb_parm)));
-
 	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
 
 	if (likely(err <= 0))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5d46832..e8cc440 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1392,15 +1392,6 @@ ipv6_pktoptions:
 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 			   const struct tcphdr *th)
 {
-	/* This is tricky: we move IP6CB at its correct location into
-	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
-	 * _decode_session6() uses IP6CB().
-	 * barrier() makes sure compiler won't play aliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
-		sizeof(struct inet6_skb_parm));
-	barrier();
-
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
@@ -1443,8 +1434,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
-				inet6_iif(skb));
+				tcp_v6_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1460,8 +1453,6 @@ process:
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	tcp_v6_fill_cb(skb, hdr, th);
-
 #ifdef CONFIG_TCP_MD5SIG
 	if (tcp_v6_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
@@ -1493,8 +1484,6 @@ no_tcp_socket:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
-	tcp_v6_fill_cb(skb, hdr, th);
-
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 csum_error:
 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
@@ -1518,8 +1507,6 @@ do_time_wait:
 		goto discard_it;
 	}
 
-	tcp_v6_fill_cb(skb, hdr, th);
-
 	if (skb->len < (th->doff<<2)) {
 		inet_twsk_put(inet_twsk(sk));
 		goto bad_packet;
@@ -1538,7 +1525,7 @@ do_time_wait:
 					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), tcp_v6_iif(skb));
-		if (sk2 != NULL) {
+		if (sk2) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
 			inet_twsk_deschedule(tw, &tcp_death_row);
 			inet_twsk_put(tw);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb()
  2015-03-23 10:38 [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb() Alexey Kodanev
@ 2015-03-23 13:52 ` Eric Dumazet
  2015-03-25 21:25   ` Alexey Kodanev
  2015-03-23 14:38 ` Eric Dumazet
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2015-03-23 13:52 UTC (permalink / raw)
  To: Alexey Kodanev; +Cc: netdev, linux-kernel

On Mon, 2015-03-23 at 13:38 +0300, Alexey Kodanev wrote:
> Regression introduced by commit 2dc49d1680.
> 
> tcp_v6_fill_cb() will be called twice if socket's state changes from
> TCP_TIME_WAIT to TCP_LISTEN. That can result in performance loss and
> control buffer data corruption because in the second tcp_v6_fill_cb()
> it's not copying the 'header' anymore, but 'seq', 'end_seq', etc.
> 
> Reproduced with LTP/tcp_fastopen test and netperf -t TCP_CRR, so when
> we're constantly closing and opening TCP connections after several
> requests/responses from client.
> 
> This can be fixed if we move 'header' union back to the beginning of
> 'struct tcp_skb_cb' and getting skb->next, TCP_SKB_CB(skb)->seq and
> TCP_SKB_CB(skb)->end_seq on the same cache line by moving 'cb[48]'
> closer to 'skb->next', above the *sk and *dev pointers.


NACK.  DO not change sk and dev pointers.

Fix the bug yes, do not change skb layout so radically, it will have
serious performance impact for other part of the stack.




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb()
  2015-03-23 10:38 [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb() Alexey Kodanev
  2015-03-23 13:52 ` Eric Dumazet
@ 2015-03-23 14:38 ` Eric Dumazet
  2015-03-23 15:50   ` Alexey Kodanev
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2015-03-23 14:38 UTC (permalink / raw)
  To: Alexey Kodanev; +Cc: netdev, linux-kernel

On Mon, 2015-03-23 at 13:38 +0300, Alexey Kodanev wrote:
\
> @@ -1538,7 +1525,7 @@ do_time_wait:
>  					    &ipv6_hdr(skb)->saddr, th->source,
>  					    &ipv6_hdr(skb)->daddr,
>  					    ntohs(th->dest), tcp_v6_iif(skb));
> -		if (sk2 != NULL) {
> +		if (sk2) {
>  			struct inet_timewait_sock *tw = inet_twsk(sk);
>  			inet_twsk_deschedule(tw, &tcp_death_row);
>  			inet_twsk_put(tw);

And please refrain adding such cleanup changes in a bug fix.

Thanks !



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb()
  2015-03-23 14:38 ` Eric Dumazet
@ 2015-03-23 15:50   ` Alexey Kodanev
  0 siblings, 0 replies; 6+ messages in thread
From: Alexey Kodanev @ 2015-03-23 15:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, linux-kernel

Hi Eric,
On 03/23/2015 05:38 PM, Eric Dumazet wrote:
> On Mon, 2015-03-23 at 13:38 +0300, Alexey Kodanev wrote:
> \
>> @@ -1538,7 +1525,7 @@ do_time_wait:
>>   					    &ipv6_hdr(skb)->saddr, th->source,
>>   					    &ipv6_hdr(skb)->daddr,
>>   					    ntohs(th->dest), tcp_v6_iif(skb));
>> -		if (sk2 != NULL) {
>> +		if (sk2) {
>>   			struct inet_timewait_sock *tw = inet_twsk(sk);
>>   			inet_twsk_deschedule(tw, &tcp_death_row);
>>   			inet_twsk_put(tw);
> And please refrain adding such cleanup changes in a bug fix.

Sure, I will. Thank you for review!

Best regards,
Alexey

> Thanks !
>
>


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb()
  2015-03-23 13:52 ` Eric Dumazet
@ 2015-03-25 21:25   ` Alexey Kodanev
  2015-03-25 22:26     ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Alexey Kodanev @ 2015-03-25 21:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, linux-kernel

Hi!
On 03/23/2015 04:52 PM, Eric Dumazet wrote:
> On Mon, 2015-03-23 at 13:38 +0300, Alexey Kodanev wrote:
>> Regression introduced by commit 2dc49d1680.
>>
>> tcp_v6_fill_cb() will be called twice if socket's state changes from
>> TCP_TIME_WAIT to TCP_LISTEN. That can result in performance loss and
>> control buffer data corruption because in the second tcp_v6_fill_cb()
>> it's not copying the 'header' anymore, but 'seq', 'end_seq', etc.
>>
>> Reproduced with LTP/tcp_fastopen test and netperf -t TCP_CRR, so when
>> we're constantly closing and opening TCP connections after several
>> requests/responses from client.
>>
>> This can be fixed if we move 'header' union back to the beginning of
>> 'struct tcp_skb_cb' and getting skb->next, TCP_SKB_CB(skb)->seq and
>> TCP_SKB_CB(skb)->end_seq on the same cache line by moving 'cb[48]'
>> closer to 'skb->next', above the *sk and *dev pointers.
>
> NACK.  DO not change sk and dev pointers.
>
> Fix the bug yes, do not change skb layout so radically, it will have
> serious performance impact for other part of the stack.

Did various testing with exactly that change and hadn't found any 
"serious performance impact" on the stack (at least on my configuration).

Alright, what about other part of the patch, is it acceptable?

I came up with one more solution... not so radical :)
We can restore inet6_skb_parm with

     memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, ...)

in the two places in tcp_v6_rcv(), before xfrm6_policy_check() + 
tcp_v6_fill_cb() are called again:

  static int tcp_v6_rcv(struct sk_buff *skb)
  {
         const struct tcphdr *th;
@@ -1543,6 +1552,7 @@ do_time_wait:
                         inet_twsk_deschedule(tw, &tcp_death_row);
                         inet_twsk_put(tw);
                         sk = sk2;
+                       tcp_v6_restore_cb(skb);
                         goto process;
                 }
                 /* Fall through to ACK */
@@ -1551,6 +1561,7 @@ do_time_wait:
                 tcp_v6_timewait_ack(sk, skb);
                 break;
         case TCP_TW_RST:
+               tcp_v6_restore_cb(skb);
                 goto no_tcp_socket;
         case TCP_TW_SUCCESS:


Thanks,
Alexey


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb()
  2015-03-25 21:25   ` Alexey Kodanev
@ 2015-03-25 22:26     ` Eric Dumazet
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2015-03-25 22:26 UTC (permalink / raw)
  To: Alexey Kodanev; +Cc: netdev

On Thu, 2015-03-26 at 00:25 +0300, Alexey Kodanev wrote:

> Did various testing with exactly that change and hadn't found any 
> "serious performance impact" on the stack (at least on my configuration).

Sure, but nobody will take the risk to make such change without serious
tests on multiple workloads and arches.

> 
> Alright, what about other part of the patch, is it acceptable?
> 
> I came up with one more solution... not so radical :)
> We can restore inet6_skb_parm with
> 
>      memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, ...)
> 
> in the two places in tcp_v6_rcv(), before xfrm6_policy_check() + 
> tcp_v6_fill_cb() are called again:
> 
>   static int tcp_v6_rcv(struct sk_buff *skb)
>   {
>          const struct tcphdr *th;
> @@ -1543,6 +1552,7 @@ do_time_wait:
>                          inet_twsk_deschedule(tw, &tcp_death_row);
>                          inet_twsk_put(tw);
>                          sk = sk2;
> +                       tcp_v6_restore_cb(skb);
>                          goto process;
>                  }
>                  /* Fall through to ACK */
> @@ -1551,6 +1561,7 @@ do_time_wait:
>                  tcp_v6_timewait_ack(sk, skb);
>                  break;
>          case TCP_TW_RST:
> +               tcp_v6_restore_cb(skb);
>                  goto no_tcp_socket;
>          case TCP_TW_SUCCESS:
> 

Looks good to me !

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-03-25 22:26 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-23 10:38 [PATCH] net: tcp6: fix double call of tcp_v6_fill_cb() Alexey Kodanev
2015-03-23 13:52 ` Eric Dumazet
2015-03-25 21:25   ` Alexey Kodanev
2015-03-25 22:26     ` Eric Dumazet
2015-03-23 14:38 ` Eric Dumazet
2015-03-23 15:50   ` Alexey Kodanev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.