All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] tcp: better TCP_SKB_CB layout
@ 2014-09-22 23:50 Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Eric Dumazet @ 2014-09-22 23:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Yuchung Cheng, Neal Cardwell, Eric Dumazet

TCP had the assumption that IPCB and IP6CB are first members of skb->cb[]

This is fine, except that IPCB/IP6CB are used for a very short time
in input path.

What really matters for TCP stack is to get skb->next,
TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq in the same cache line.

skb that are immediately consumed do not care because whole skb->cb[] is
hot in cpu cache, while skb that sit in wocket write queue or receive queues
do not need TCP_SKB_CB(skb)->header at all.

This patch set implements the prereq for IPv4, IPv6, and TCP to make this
possible. This makes TCP more efficient.

Eric Dumazet (3):
  ipv4: rename ip_options_echo to __ip_options_echo()
  ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted()
  tcp: better TCP_SKB_CB layout to reduce cache line misses

 include/net/ip.h      | 15 ++++++++++++---
 include/net/ipv6.h    |  3 ++-
 include/net/tcp.h     | 12 ++++++------
 net/dccp/ipv6.c       |  2 +-
 net/ipv4/ip_options.c |  6 ++----
 net/ipv4/ip_output.c  |  8 +++++---
 net/ipv4/tcp_ipv4.c   | 17 +++++++++++++----
 net/ipv6/af_inet6.c   |  4 ++--
 net/ipv6/syncookies.c |  2 +-
 net/ipv6/tcp_ipv6.c   | 12 ++++++++++--
 10 files changed, 54 insertions(+), 27 deletions(-)

-- 
2.1.0.rc2.206.gedb03e5

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
  2014-09-22 23:50 [PATCH net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
@ 2014-09-22 23:50 ` Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted() Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
  2 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2014-09-22 23:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Yuchung Cheng, Neal Cardwell, Eric Dumazet

ip_options_echo() assumes struct ip_options is provided in &IPCB(skb)->opt
Lets break this assumption, but provide a helper to not change all call points.

ip_send_unicast_reply() gets a new struct ip_options pointer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/ip.h      | 15 ++++++++++++---
 net/ipv4/ip_options.c |  6 ++----
 net/ipv4/ip_output.c  |  8 +++++---
 net/ipv4/tcp_ipv4.c   | 10 ++++++----
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 14bfc8e1bcf9..c90e3357d589 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -180,8 +180,10 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
 	return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
 }
 
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
-			   __be32 saddr, const struct ip_reply_arg *arg,
+void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+			   const struct ip_options *sopt,
+			   __be32 daddr, __be32 saddr,
+			   const struct ip_reply_arg *arg,
 			   unsigned int len);
 
 #define IP_INC_STATS(net, field)	SNMP_INC_STATS64((net)->mib.ip_statistics, field)
@@ -511,7 +513,14 @@ int ip_forward(struct sk_buff *skb);
  
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
-int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
+
+int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
+		      const struct ip_options *sopt);
+static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+{
+	return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+}
+
 void ip_options_fragment(struct sk_buff *skb);
 int ip_options_compile(struct net *net, struct ip_options *opt,
 		       struct sk_buff *skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ad382499bace..5b3d91be2db0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -87,17 +87,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
  * NOTE: dopt cannot point to skb.
  */
 
-int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
+		      const struct ip_options *sopt)
 {
-	const struct ip_options *sopt;
 	unsigned char *sptr, *dptr;
 	int soffset, doffset;
 	int	optlen;
 
 	memset(dopt, 0, sizeof(struct ip_options));
 
-	sopt = &(IPCB(skb)->opt);
-
 	if (sopt->optlen == 0)
 		return 0;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 215af2b155cb..c8fa62476461 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1522,8 +1522,10 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
 	.uc_ttl		= -1,
 };
 
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
-			   __be32 saddr, const struct ip_reply_arg *arg,
+void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+			   const struct ip_options *sopt,
+			   __be32 daddr, __be32 saddr,
+			   const struct ip_reply_arg *arg,
 			   unsigned int len)
 {
 	struct ip_options_data replyopts;
@@ -1534,7 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 	struct sock *sk;
 	struct inet_sock *inet;
 
-	if (ip_options_echo(&replyopts.opt.opt, skb))
+	if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
 		return;
 
 	ipc.addr = daddr;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3b2e49cb2b61..28ab90382c01 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -681,8 +681,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 	net = dev_net(skb_dst(skb)->dev);
 	arg.tos = ip_hdr(skb)->tos;
-	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
-			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
+	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+			      &arg, arg.iov[0].iov_len);
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -764,8 +765,9 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 	if (oif)
 		arg.bound_dev_if = oif;
 	arg.tos = tos;
-	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
-			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
+	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+			      &arg, arg.iov[0].iov_len);
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 }
-- 
2.1.0.rc2.206.gedb03e5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted()
  2014-09-22 23:50 [PATCH net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
@ 2014-09-22 23:50 ` Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
  2 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2014-09-22 23:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Yuchung Cheng, Neal Cardwell, Eric Dumazet

ipv6_opt_accepted() assumes IP6CB(skb) holds the struct inet6_skb_parm
that it needs. Lets not assume this, as TCP stack might use a different
place.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/ipv6.h    | 3 ++-
 net/dccp/ipv6.c       | 2 +-
 net/ipv6/af_inet6.c   | 4 ++--
 net/ipv6/syncookies.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 5 +++--
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7e247e9b8765..97f472012438 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -288,7 +288,8 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
 struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 					  struct ipv6_txoptions *opt);
 
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb);
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+		       const struct inet6_skb_parm *opt);
 
 static inline bool ipv6_accept_ra(struct inet6_dev *idev)
 {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 04cb17d4b0ce..ad2acfe1ca61 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -404,7 +404,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 
-	if (ipv6_opt_accepted(sk, skb) ||
+	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 		atomic_inc(&skb->users);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e4865a3ebe1d..34f726f59814 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -672,10 +672,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
 
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+		       const struct inet6_skb_parm *opt)
 {
 	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct inet6_skb_parm *opt = IP6CB(skb);
 
 	if (np->rxopt.all) {
 		if ((opt->hop && (np->rxopt.bits.hopopts ||
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c643dc907ce7..9a2838e93cc5 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -203,7 +203,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_num = ntohs(th->dest);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
-	if (ipv6_opt_accepted(sk, skb) ||
+	if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 		atomic_inc(&skb->users);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index de51a88bec6f..9400b4326f22 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -742,7 +742,8 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
 		ireq->ir_iif = inet6_iif(skb);
 
 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
-	    (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
+	    (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+	     np->rxopt.bits.rxinfo ||
 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 	     np->rxopt.bits.rxohlim || np->repflow)) {
 		atomic_inc(&skb->users);
@@ -1367,7 +1368,7 @@ ipv6_pktoptions:
 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
 		if (np->repflow)
 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
-		if (ipv6_opt_accepted(sk, opt_skb)) {
+		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
 		} else {
-- 
2.1.0.rc2.206.gedb03e5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses
  2014-09-22 23:50 [PATCH net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
  2014-09-22 23:50 ` [PATCH net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted() Eric Dumazet
@ 2014-09-22 23:50 ` Eric Dumazet
  2014-09-23  7:20   ` Christoph Paasch
  2 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2014-09-22 23:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Yuchung Cheng, Neal Cardwell, Eric Dumazet

TCP maintains lists of skb in write queue, and in receive queues
(in order and out of order queues)

Scanning these lists both in input and output path usually requires
access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq

These fields are currently in two different cache lines, meaning we
waste lot of memory bandwidth when these queues are big and flows
have either packet drops or packet reorders.

We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
this header is not used in fast path. This allows TCP to search much faster
in the skb lists.

Even with regular flows, we save one cache line miss in fast path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/tcp.h   | 12 ++++++------
 net/ipv4/tcp_ipv4.c |  7 +++++++
 net/ipv6/tcp_ipv6.c |  7 +++++++
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a4201ef216e8..4dc6641ee990 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -696,12 +696,6 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
  * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
  */
 struct tcp_skb_cb {
-	union {
-		struct inet_skb_parm	h4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct inet6_skb_parm	h6;
-#endif
-	} header;	/* For incoming frames		*/
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	__u32		tcp_tw_isn;	/* isn chosen by tcp_timewait_state_process() */
@@ -720,6 +714,12 @@ struct tcp_skb_cb {
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
 	/* 1 byte hole */
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;	/* For incoming frames		*/
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 28ab90382c01..70c4a21f6f45 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1636,6 +1636,13 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	th = tcp_hdr(skb);
 	iph = ip_hdr(skb);
+	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+	 * barrier() makes sure compiler wont play fool^Waliasing games.
+	 */
+	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+		sizeof(struct inet_skb_parm));
+	barrier();
+
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff * 4);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9400b4326f22..132bac137aed 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1412,6 +1412,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
+	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+	 * barrier() makes sure compiler wont play fool^Waliasing games.
+	 */
+	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+		sizeof(struct inet6_skb_parm));
+	barrier();
+
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
-- 
2.1.0.rc2.206.gedb03e5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses
  2014-09-22 23:50 ` [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
@ 2014-09-23  7:20   ` Christoph Paasch
  2014-09-23  7:36     ` Christoph Paasch
  0 siblings, 1 reply; 7+ messages in thread
From: Christoph Paasch @ 2014-09-23  7:20 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, netdev, Yuchung Cheng, Neal Cardwell

Hello Eric,

On 22/09/14 - 16:50:44, Eric Dumazet wrote:
> TCP maintains lists of skb in write queue, and in receive queues
> (in order and out of order queues)
> 
> Scanning these lists both in input and output path usually requires
> access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq
> 
> These fields are currently in two different cache lines, meaning we
> waste lot of memory bandwidth when these queues are big and flows
> have either packet drops or packet reorders.
> 
> We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
> this header is not used in fast path. This allows TCP to search much faster
> in the skb lists.
> 
> Even with regular flows, we save one cache line miss in fast path.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/tcp.h   | 12 ++++++------
>  net/ipv4/tcp_ipv4.c |  7 +++++++
>  net/ipv6/tcp_ipv6.c |  7 +++++++
>  3 files changed, 20 insertions(+), 6 deletions(-)

doesn't also the tx-path relies on IPCB(skb) being memset to 0 (e.g., in
xfrm4_output or ip_options_build)

Or is it somewhere already explicitly memset to 0? (didn't found this place
in the code though)


Cheers,
Christoph

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses
  2014-09-23  7:20   ` Christoph Paasch
@ 2014-09-23  7:36     ` Christoph Paasch
  2014-09-23  9:09       ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Christoph Paasch @ 2014-09-23  7:36 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, netdev, Yuchung Cheng, Neal Cardwell

On 23/09/14 - 09:20:16, Christoph Paasch wrote:
> Hello Eric,
> 
> On 22/09/14 - 16:50:44, Eric Dumazet wrote:
> > TCP maintains lists of skb in write queue, and in receive queues
> > (in order and out of order queues)
> > 
> > Scanning these lists both in input and output path usually requires
> > access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq
> > 
> > These fields are currently in two different cache lines, meaning we
> > waste lot of memory bandwidth when these queues are big and flows
> > have either packet drops or packet reorders.
> > 
> > We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
> > this header is not used in fast path. This allows TCP to search much faster
> > in the skb lists.
> > 
> > Even with regular flows, we save one cache line miss in fast path.
> > 
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  include/net/tcp.h   | 12 ++++++------
> >  net/ipv4/tcp_ipv4.c |  7 +++++++
> >  net/ipv6/tcp_ipv6.c |  7 +++++++
> >  3 files changed, 20 insertions(+), 6 deletions(-)
> 
> doesn't also the tx-path relies on IPCB(skb) being memset to 0 (e.g., in
> xfrm4_output or ip_options_build)

Crap, forget about ip_options_build. It is memcpy'ing into IPCB... Confused
src with dest... :-)

Nevertheless, wouldn't there be a problem in xfrm4_output?


Cheers,
Christoph

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses
  2014-09-23  7:36     ` Christoph Paasch
@ 2014-09-23  9:09       ` Eric Dumazet
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2014-09-23  9:09 UTC (permalink / raw)
  To: Christoph Paasch
  Cc: Eric Dumazet, David S. Miller, netdev, Yuchung Cheng, Neal Cardwell

On Tue, 2014-09-23 at 09:36 +0200, Christoph Paasch wrote:
> On 23/09/14 - 09:20:16, Christoph Paasch wrote:
> > Hello Eric,
...
> > doesn't also the tx-path relies on IPCB(skb) being memset to 0 (e.g., in
> > xfrm4_output or ip_options_build)
> 
> Crap, forget about ip_options_build. It is memcpy'ing into IPCB... Confused
> src with dest... :-)
> 
> Nevertheless, wouldn't there be a problem in xfrm4_output?

Hi Christoph.

I believe you're right. I'll include a memset() in v2.
 
Thanks !

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-09-23  9:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-22 23:50 [PATCH net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
2014-09-22 23:50 ` [PATCH net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
2014-09-22 23:50 ` [PATCH net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted() Eric Dumazet
2014-09-22 23:50 ` [PATCH net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
2014-09-23  7:20   ` Christoph Paasch
2014-09-23  7:36     ` Christoph Paasch
2014-09-23  9:09       ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.