All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org, BjörnTöpel <bjorn.topel@intel.com>,
	magnus.karlsson@intel.com
Cc: eugenia@mellanox.com, Jason Wang <jasowang@redhat.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Eran Ben Elisha <eranbe@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	galp@mellanox.com, Jesper Dangaard Brouer <brouer@redhat.com>,
	Daniel Borkmann <borkmann@iogearbox.net>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Tariq Toukan <tariqt@mellanox.com>
Subject: [bpf-next V3 PATCH 02/15] xdp: introduce xdp_return_frame API and use in cpumap
Date: Fri, 09 Mar 2018 21:55:11 +0100	[thread overview]
Message-ID: <152062891186.27458.6614524717564664091.stgit@firesoul> (raw)
In-Reply-To: <152062887576.27458.8590966896888512270.stgit@firesoul>

Introduce an xdp_return_frame API, and convert over cpumap as
the first user, given it have queued XDP frame structure to leverage.

V3: Cleanup and remove C99 style comments, pointed out by Alex Duyck.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/net/xdp.h   |   28 ++++++++++++++++++++++++
 kernel/bpf/cpumap.c |   60 +++++++++++++++++++++++++++++++--------------------
 net/core/xdp.c      |   18 +++++++++++++++
 3 files changed, 82 insertions(+), 24 deletions(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index b2362ddfa694..15b546325e31 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -33,16 +33,44 @@
  * also mandatory during RX-ring setup.
  */
 
+enum mem_type {
+	MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
+	MEM_TYPE_PAGE_ORDER0,     /* Orig XDP full page model */
+	MEM_TYPE_MAX,
+};
+
+struct xdp_mem_info {
+	u32 type; /* enum mem_type, but known size type */
+	/* u32 id; will be added later in this patchset */
+};
+
 struct xdp_rxq_info {
 	struct net_device *dev;
 	u32 queue_index;
 	u32 reg_state;
+	struct xdp_mem_info mem;
 } ____cacheline_aligned; /* perf critical, avoid false-sharing */
 
+
+static inline
+void xdp_return_frame(void *data, struct xdp_mem_info *mem)
+{
+	if (mem->type == MEM_TYPE_PAGE_SHARED)
+		page_frag_free(data);
+
+	if (mem->type == MEM_TYPE_PAGE_ORDER0) {
+		struct page *page = virt_to_page(data); /* Assumes order0 page*/
+
+		put_page(page);
+	}
+}
+
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 		     struct net_device *dev, u32 queue_index);
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+			       enum mem_type type, void *allocator);
 
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a4bb0b34375a..3e4bbcbe3e86 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -19,6 +19,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/ptr_ring.h>
+#include <net/xdp.h>
 
 #include <linux/sched.h>
 #include <linux/workqueue.h>
@@ -137,27 +138,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static void __cpu_map_queue_destructor(void *ptr)
-{
-	/* The tear-down procedure should have made sure that queue is
-	 * empty.  See __cpu_map_entry_replace() and work-queue
-	 * invoked cpu_map_kthread_stop(). Catch any broken behaviour
-	 * gracefully and warn once.
-	 */
-	if (WARN_ON_ONCE(ptr))
-		page_frag_free(ptr);
-}
-
-static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
-{
-	if (atomic_dec_and_test(&rcpu->refcnt)) {
-		/* The queue should be empty at this point */
-		ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
-		kfree(rcpu->queue);
-		kfree(rcpu);
-	}
-}
-
 static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
 {
 	atomic_inc(&rcpu->refcnt);
@@ -188,6 +168,10 @@ struct xdp_pkt {
 	u16 len;
 	u16 headroom;
 	u16 metasize;
+	/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
+	 * while mem info is valid on remote CPU.
+	 */
+	struct xdp_mem_info mem;
 	struct net_device *dev_rx;
 };
 
@@ -213,6 +197,9 @@ static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
 	xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
 	xdp_pkt->metasize = metasize;
 
+	/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
+	xdp_pkt->mem = xdp->rxq->mem;
+
 	return xdp_pkt;
 }
 
@@ -265,6 +252,31 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	return skb;
 }
 
+static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+{
+	/* The tear-down procedure should have made sure that queue is
+	 * empty.  See __cpu_map_entry_replace() and work-queue
+	 * invoked cpu_map_kthread_stop(). Catch any broken behaviour
+	 * gracefully and warn once.
+	 */
+	struct xdp_pkt *xdp_pkt;
+
+	while ((xdp_pkt = ptr_ring_consume(ring)))
+		if (WARN_ON_ONCE(xdp_pkt))
+			xdp_return_frame(xdp_pkt, &xdp_pkt->mem);
+}
+
+static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+{
+	if (atomic_dec_and_test(&rcpu->refcnt)) {
+		/* The queue should be empty at this point */
+		__cpu_map_ring_cleanup(rcpu->queue);
+		ptr_ring_cleanup(rcpu->queue, NULL);
+		kfree(rcpu->queue);
+		kfree(rcpu);
+	}
+}
+
 static int cpu_map_kthread_run(void *data)
 {
 	struct bpf_cpu_map_entry *rcpu = data;
@@ -307,7 +319,7 @@ static int cpu_map_kthread_run(void *data)
 
 			skb = cpu_map_build_skb(rcpu, xdp_pkt);
 			if (!skb) {
-				page_frag_free(xdp_pkt);
+				xdp_return_frame(xdp_pkt, &xdp_pkt->mem);
 				continue;
 			}
 
@@ -604,13 +616,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 	spin_lock(&q->producer_lock);
 
 	for (i = 0; i < bq->count; i++) {
-		void *xdp_pkt = bq->q[i];
+		struct xdp_pkt *xdp_pkt = bq->q[i];
 		int err;
 
 		err = __ptr_ring_produce(q, xdp_pkt);
 		if (err) {
 			drops++;
-			page_frag_free(xdp_pkt); /* Free xdp_pkt */
+			xdp_return_frame(xdp_pkt->data, &xdp_pkt->mem);
 		}
 		processed++;
 	}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 097a0f74e004..9eee0c431126 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -71,3 +71,21 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
 	return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
+
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+			       enum mem_type type, void *allocator)
+{
+	if (type >= MEM_TYPE_MAX)
+		return -EINVAL;
+
+	xdp_rxq->mem.type = type;
+
+	if (allocator)
+		return -EOPNOTSUPP;
+
+	/* TODO: Allocate an ID that maps to allocator pointer
+	 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);

  parent reply	other threads:[~2018-03-09 20:55 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-09 20:55 [bpf-next V3 PATCH 00/15] XDP redirect memory return API Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 01/15] mlx5: basic XDP_REDIRECT forward support Jesper Dangaard Brouer
2018-03-09 20:55 ` Jesper Dangaard Brouer [this message]
2018-03-09 20:55 ` [bpf-next V3 PATCH 03/15] ixgbe: use xdp_return_frame API Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 04/15] xdp: move struct xdp_buff from filter.h to xdp.h Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 05/15] xdp: introduce a new xdp_frame type Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 06/15] tun: convert to use generic xdp_frame and xdp_return_frame API Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 07/15] virtio_net: " Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 08/15] bpf: cpumap convert to use generic xdp_frame Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 09/15] mlx5: register a memory model when XDP is enabled Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 10/15] xdp: rhashtable with allocator ID to pointer mapping Jesper Dangaard Brouer
2018-03-09 20:55 ` [bpf-next V3 PATCH 11/15] page_pool: refurbish version of page_pool code Jesper Dangaard Brouer
2018-03-09 20:56 ` [bpf-next V3 PATCH 12/15] xdp: allow page_pool as an allocator type in xdp_return_frame Jesper Dangaard Brouer
2018-03-09 20:56 ` [bpf-next V3 PATCH 13/15] mlx5: use page_pool for xdp_return_frame call Jesper Dangaard Brouer
2018-03-12 10:08   ` Tariq Toukan
2018-03-12 10:16     ` Tariq Toukan
2018-03-12 13:20       ` Tariq Toukan
2018-03-19 13:12         ` Jesper Dangaard Brouer
2018-03-20  7:43           ` Tariq Toukan
2018-03-20  8:18             ` Tariq Toukan
2018-03-09 20:56 ` [bpf-next V3 PATCH 14/15] xdp: transition into using xdp_frame for return API Jesper Dangaard Brouer
2018-03-09 20:56 ` [bpf-next V3 PATCH 15/15] xdp: transition into using xdp_frame for ndo_xdp_xmit Jesper Dangaard Brouer
2018-03-16  9:04 ` [bpf-next V3 PATCH 00/15] XDP redirect memory return API Jason Wang
2018-03-19 10:10   ` Jesper Dangaard Brouer
2018-03-20  2:28     ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152062891186.27458.6614524717564664091.stgit@firesoul \
    --to=brouer@redhat.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=bjorn.topel@intel.com \
    --cc=borkmann@iogearbox.net \
    --cc=eranbe@mellanox.com \
    --cc=eugenia@mellanox.com \
    --cc=galp@mellanox.com \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=tariqt@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.