All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org, Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	tom@herbertland.com, Alexander Duyck <alexander.duyck@gmail.com>,
	alexei.starovoitov@gmail.com, linux-mm@kvack.org,
	Jesper Dangaard Brouer <brouer@redhat.com>,
	Christoph Lameter <cl@linux.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [net-next PATCH V2 1/3] net: bulk free infrastructure for NAPI context, use napi_consume_skb
Date: Mon, 08 Feb 2016 13:14:59 +0100	[thread overview]
Message-ID: <20160208121459.8860.85632.stgit@localhost> (raw)
In-Reply-To: <20160208121328.8860.67014.stgit@localhost>

Discovered that network stack were hitting the kmem_cache/SLUB
slowpath when freeing SKBs.  Doing bulk free with kmem_cache_free_bulk
can speedup this slowpath.

NAPI context is a bit special, lets take advantage of that for bulk
free'ing SKBs.

In NAPI context we are running in softirq, which gives us certain
protection.  A softirq can run on several CPUs at once.  BUT the
important part is a softirq will never preempt another softirq running
on the same CPU.  This gives us the opportunity to access per-cpu
variables in softirq context.

Extend napi_alloc_cache (before only contained page_frag_cache) to be
a struct with a small array based stack for holding SKBs.  Introduce a
SKB defer and flush API for accessing this.

Introduce napi_consume_skb() as replacement for e.g. dev_consume_skb_any()
when running in NAPI context.  A small trick to handle/detect if we
are called from netpoll is to see if budget is 0.  In that case, we
need to invoke dev_consume_skb_irq().

Joint work with Alexander Duyck.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 include/linux/skbuff.h |    3 ++
 net/core/dev.c         |    1 +
 net/core/skbuff.c      |   83 +++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11f935c1a090..3c8d348223d7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2399,6 +2399,9 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
 {
 	return __napi_alloc_skb(napi, length, GFP_ATOMIC);
 }
+void napi_consume_skb(struct sk_buff *skb, int budget);
+
+void __kfree_skb_flush(void);
 
 /**
  * __dev_alloc_pages - allocate page for network Rx
diff --git a/net/core/dev.c b/net/core/dev.c
index 8cba3d852f25..44384a8c9613 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5152,6 +5152,7 @@ static void net_rx_action(struct softirq_action *h)
 		}
 	}
 
+	__kfree_skb_flush();
 	local_irq_disable();
 
 	list_splice_tail_init(&sd->poll_list, &list);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375ec9c2..e26bb2b1dba4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 }
 EXPORT_SYMBOL(build_skb);
 
+#define NAPI_SKB_CACHE_SIZE	64
+
+struct napi_alloc_cache {
+	struct page_frag_cache page;
+	size_t skb_count;
+	void *skb_cache[NAPI_SKB_CACHE_SIZE];
+};
+
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
 
 static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	return __alloc_page_frag(nc, fragsz, gfp_mask);
+	return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
 }
 
 void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 				 gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 	struct sk_buff *skb;
 	void *data;
 
@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	if (sk_memalloc_socks())
 		gfp_mask |= __GFP_MEMALLOC;
 
-	data = __alloc_page_frag(nc, len, gfp_mask);
+	data = __alloc_page_frag(&nc->page, len, gfp_mask);
 	if (unlikely(!data))
 		return NULL;
 
@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	}
 
 	/* use OR instead of assignment to avoid clearing of bits in mask */
-	if (nc->pfmemalloc)
+	if (nc->page.pfmemalloc)
 		skb->pfmemalloc = 1;
 	skb->head_frag = 1;
 
@@ -747,6 +755,69 @@ void consume_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(consume_skb);
 
+void __kfree_skb_flush(void)
+{
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+	/* flush skb_cache if containing objects */
+	if (nc->skb_count) {
+		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
+				     nc->skb_cache);
+		nc->skb_count = 0;
+	}
+}
+
+static void __kfree_skb_defer(struct sk_buff *skb)
+{
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+	/* drop skb->head and call any destructors for packet */
+	skb_release_all(skb);
+
+	/* record skb to CPU local list */
+	nc->skb_cache[nc->skb_count++] = skb;
+
+#ifdef CONFIG_SLUB
+	/* SLUB writes into objects when freeing */
+	prefetchw(skb);
+#endif
+
+	/* flush skb_cache if it is filled */
+	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
+		kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
+				     nc->skb_cache);
+		nc->skb_count = 0;
+	}
+}
+
+void napi_consume_skb(struct sk_buff *skb, int budget)
+{
+	if (unlikely(!skb))
+		return;
+
+	/* if budget is 0 assume netpoll w/ IRQs disabled */
+	if (unlikely(!budget)) {
+		dev_consume_skb_irq(skb);
+		return;
+	}
+
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	/* if reaching here SKB is ready to free */
+	trace_consume_skb(skb);
+
+	/* if SKB is a clone, don't handle this case */
+	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	__kfree_skb_defer(skb);
+}
+EXPORT_SYMBOL(napi_consume_skb);
+
 /* Make sure a field is enclosed inside headers_start/headers_end section */
 #define CHECK_SKB_FIELD(field) \
 	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2016-02-08 12:14 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-23 12:46 [PATCH 0/4] net: mitigating kmem_cache slowpath for network stack in NAPI context Jesper Dangaard Brouer
2015-10-23 12:46 ` Jesper Dangaard Brouer
2015-10-23 12:46 ` [PATCH 1/4] net: bulk free infrastructure for NAPI context, use napi_consume_skb Jesper Dangaard Brouer
2015-10-23 12:46   ` Jesper Dangaard Brouer
2015-10-23 12:46 ` [PATCH 2/4] net: bulk free SKBs that were delay free'ed due to IRQ context Jesper Dangaard Brouer
2015-10-23 12:46 ` [PATCH 3/4] ixgbe: bulk free SKBs during TX completion cleanup cycle Jesper Dangaard Brouer
2015-10-23 12:46   ` Jesper Dangaard Brouer
2015-10-23 12:46 ` [PATCH 4/4] net: bulk alloc and reuse of SKBs in NAPI context Jesper Dangaard Brouer
2015-10-27  1:09 ` [PATCH 0/4] net: mitigating kmem_cache slowpath for network stack " David Miller
2016-02-02 21:11 ` [net-next PATCH 00/11] net: mitigating kmem_cache slowpath and BoF discussion patches Jesper Dangaard Brouer
2016-02-02 21:11   ` [net-next PATCH 01/11] net: bulk free infrastructure for NAPI context, use napi_consume_skb Jesper Dangaard Brouer
2016-02-02 21:11   ` [net-next PATCH 02/11] net: bulk free SKBs that were delay free'ed due to IRQ context Jesper Dangaard Brouer
2016-02-02 21:11   ` [net-next PATCH 03/11] ixgbe: bulk free SKBs during TX completion cleanup cycle Jesper Dangaard Brouer
2016-02-02 21:12   ` [net-next PATCH 04/11] net: bulk alloc and reuse of SKBs in NAPI context Jesper Dangaard Brouer
2016-02-03  0:52     ` Alexei Starovoitov
2016-02-03 10:38       ` Jesper Dangaard Brouer
2016-02-02 21:12   ` [net-next PATCH 05/11] mlx5: use napi_*_skb APIs to get bulk alloc and free Jesper Dangaard Brouer
2016-02-02 21:13   ` [net-next PATCH 06/11] RFC: mlx5: RX bulking or bundling of packets before calling network stack Jesper Dangaard Brouer
2016-02-09 11:57     ` Saeed Mahameed
2016-02-10 20:26       ` Jesper Dangaard Brouer
2016-02-16  0:01         ` Saeed Mahameed
2016-02-02 21:13   ` [net-next PATCH 07/11] net: introduce napi_alloc_skb_hint() for more use-cases Jesper Dangaard Brouer
2016-02-02 22:29     ` kbuild test robot
2016-02-02 21:14   ` [net-next PATCH 08/11] mlx5: hint the NAPI alloc skb API about the expected bulk size Jesper Dangaard Brouer
2016-02-02 21:14   ` [net-next PATCH 09/11] RFC: dummy: bulk free SKBs Jesper Dangaard Brouer
2016-02-02 21:15   ` [net-next PATCH 10/11] RFC: net: API for RX handover of multiple SKBs to stack Jesper Dangaard Brouer
2016-02-02 21:15   ` [net-next PATCH 11/11] RFC: net: RPS bulk enqueue to backlog Jesper Dangaard Brouer
2016-02-07 19:25   ` [net-next PATCH 00/11] net: mitigating kmem_cache slowpath and BoF discussion patches David Miller
2016-02-08 12:14     ` [net-next PATCH V2 0/3] net: mitigating kmem_cache free slowpath Jesper Dangaard Brouer
2016-02-08 12:14       ` Jesper Dangaard Brouer
2016-02-08 12:14       ` Jesper Dangaard Brouer [this message]
2016-02-08 12:15       ` [net-next PATCH V2 2/3] net: bulk free SKBs that were delay free'ed due to IRQ context Jesper Dangaard Brouer
2016-02-08 12:15       ` [net-next PATCH V2 3/3] ixgbe: bulk free SKBs during TX completion cleanup cycle Jesper Dangaard Brouer
2016-02-11 16:59       ` [net-next PATCH V2 0/3] net: mitigating kmem_cache free slowpath David Miller
2016-02-13 11:12       ` Tilman Schmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160208121459.8860.85632.stgit@localhost \
    --to=brouer@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.duyck@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=cl@linux.com \
    --cc=davem@davemloft.net \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=tom@herbertland.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.