All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] fclone layout suboptimal
@ 2014-09-26 13:07 Eric Dumazet
  2014-09-26 13:35 ` Eric Dumazet
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Dumazet @ 2014-09-26 13:07 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Fast clones have following layout :

[sk_buff 1]
[sk_buff 2]
[atomic_t fclone_ref]


Main consumer is TCP stack for its write queue.

When tcp_ack()/tcp_clean_rtx_queue() frees skb,
kfree_skbmem() needs to fetch a cold cache line :

  0.72 │      je     b0
       │6f:   add    $0x8,%rsp
       │      pop    %rbx
  0.07 │      pop    %rbp
       │      retq
       │      nop
  0.52 │80:   lock   decl   0x1b0(%rbx)
 90.23 │   ┌──je     90
       │   │  jmp    6f
       │   │  nop
       │90:└─ mov    0x5c4e29(%rip),%rdi
       │      mov    %rbx,%rsi
       │      callq  kmem_cache_free
  1.37 │      add    $0x8,%rsp
       │      pop    %rbx
       │      pop    %rbp
  0.91 │      retq
       │      nop
       │b0:   mov    0x5c4e09(%rip),%rdi
       │      lea    -0xd8(%rbx),%rsi
       │      callq  kmem_cache_free

It might be better to have :

[sk_buff skb1]
[atomic_t fclone_ref]
[sk_buff skb2]

__alloc(skb) would not have to dirty a cache line to perform the
atomic_set(fclone_ref, 1);

kfree_skbmem() would do the atomic_dec_and_test() on a hot cache line
(Because we had access to skb_shinfo() a bit earlier while doing
skb_release_all()

When TX completions has to free the cloned sk_buff, fetching fclone_ref
would use an already hot cache line as well (skb2->next / skb2->sk are
already in cpu cache)

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC] fclone layout suboptimal
  2014-09-26 13:07 [RFC] fclone layout suboptimal Eric Dumazet
@ 2014-09-26 13:35 ` Eric Dumazet
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Dumazet @ 2014-09-26 13:35 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Fri, 2014-09-26 at 06:07 -0700, Eric Dumazet wrote:

> [sk_buff skb1]
> [atomic_t fclone_ref]
> [sk_buff skb2]

Untested patch would be :

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index da1378a3e2c7..e39f03ea1822 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -178,6 +178,15 @@ out:
 	return skb;
 }
 
+/* Layout of fast clones : [skb1][fclone_ref][skb2] */
+struct sk_buff_fclones {
+	struct sk_buff	skb1;
+
+	atomic_t	fclone_ref;
+
+	struct sk_buff	skb2;
+};
+
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -257,16 +266,17 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	if (flags & SKB_ALLOC_FCLONE) {
-		struct sk_buff *child = skb + 1;
-		atomic_t *fclone_ref = (atomic_t *) (child + 1);
+		struct sk_buff_fclones *fclones;
+
+		fclones = (struct sk_buff_fclones *)skb;
 
-		kmemcheck_annotate_bitfield(child, flags1);
-		kmemcheck_annotate_bitfield(child, flags2);
-		skb->fclone = SKB_FCLONE_ORIG;
-		atomic_set(fclone_ref, 1);
+		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
+		kmemcheck_annotate_bitfield(&fclones->skb2, flags2);
+		fclones->skb1.fclone = SKB_FCLONE_ORIG;
+		atomic_set(&fclones->fclone_ref, 1);
 
-		child->fclone = SKB_FCLONE_UNAVAILABLE;
-		child->pfmemalloc = pfmemalloc;
+		fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
+		fclones->skb2.pfmemalloc = pfmemalloc;
 	}
 out:
 	return skb;
@@ -524,8 +534,7 @@ static void skb_release_data(struct sk_buff *skb)
  */
 static void kfree_skbmem(struct sk_buff *skb)
 {
-	struct sk_buff *other;
-	atomic_t *fclone_ref;
+	struct sk_buff_fclones *fclones;
 
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
@@ -533,22 +542,21 @@ static void kfree_skbmem(struct sk_buff *skb)
 		break;
 
 	case SKB_FCLONE_ORIG:
-		fclone_ref = (atomic_t *) (skb + 2);
-		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, skb);
+		fclones = container_of(skb, struct sk_buff_fclones, skb1);
+		if (atomic_dec_and_test(&fclones->fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, fclones);
 		break;
 
 	case SKB_FCLONE_CLONE:
-		fclone_ref = (atomic_t *) (skb + 1);
-		other = skb - 1;
+		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
 		/* The clone portion is available for
 		 * fast-cloning again.
 		 */
 		skb->fclone = SKB_FCLONE_UNAVAILABLE;
 
-		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, other);
+		if (atomic_dec_and_test(&fclones->fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, fclones);
 		break;
 	}
 }
@@ -855,17 +863,16 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
 
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	struct sk_buff *n;
+	struct sk_buff_fclones *fclones = (struct sk_buff_fclones *)skb;
+	struct sk_buff *n = &fclones->skb2;
 
 	if (skb_orphan_frags(skb, gfp_mask))
 		return NULL;
 
-	n = skb + 1;
 	if (skb->fclone == SKB_FCLONE_ORIG &&
 	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
-		atomic_t *fclone_ref = (atomic_t *) (n + 1);
 		n->fclone = SKB_FCLONE_CLONE;
-		atomic_inc(fclone_ref);
+		atomic_inc(&fclones->fclone_ref);
 	} else {
 		if (skb_pfmemalloc(skb))
 			gfp_mask |= __GFP_MEMALLOC;

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2014-09-26 13:35 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-26 13:07 [RFC] fclone layout suboptimal Eric Dumazet
2014-09-26 13:35 ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.