All of lore.kernel.org
 help / color / mirror / Atom feed
From: Edward Cree <ecree@solarflare.com>
To: <linux-net-drivers@solarflare.com>, <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>
Subject: [RFC PATCH v2 net-next 10/12] net: listify Generic XDP processing, part 1
Date: Tue, 26 Jun 2018 19:21:41 +0100	[thread overview]
Message-ID: <6ca465cf-c070-d4fe-73a8-b3bd8726526d@solarflare.com> (raw)
In-Reply-To: <fa3d7e58-e7b6-ad0c-619f-824c25ed0d97@solarflare.com>

Deals with all the pre- and post-amble to the BPF program itself, which is
 still called one packet at a time.
Involves some fiddly percpu variables to cope with XDP_REDIRECT handling.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 include/linux/filter.h |  10 +++
 net/core/dev.c         | 165 +++++++++++++++++++++++++++++++++++++++++++------
 net/core/filter.c      |  10 +--
 3 files changed, 156 insertions(+), 29 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..75db6cbf78a3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -820,6 +820,16 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
 	return 0;
 }
 
+struct redirect_info {
+	u32 ifindex;
+	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
+	unsigned long   map_owner;
+};
+
+DECLARE_PER_CPU(struct redirect_info, redirect_info);
+
 /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
diff --git a/net/core/dev.c b/net/core/dev.c
index 11f80d4502b9..22cbd5314d56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4015,15 +4015,14 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
 	return rxqueue;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-				     struct xdp_buff *xdp,
-				     struct bpf_prog *xdp_prog)
+static u32 netif_receive_generic_xdp_prepare(struct sk_buff *skb,
+					     struct xdp_buff *xdp,
+					     void **orig_data,
+					     void **orig_data_end,
+					     u32 *mac_len)
 {
 	struct netdev_rx_queue *rxqueue;
-	void *orig_data, *orig_data_end;
-	u32 metalen, act = XDP_DROP;
-	int hlen, off;
-	u32 mac_len;
+	int hlen;
 
 	/* Reinjected packets coming from act_mirred or similar should
 	 * not get XDP generic processing.
@@ -4054,19 +4053,35 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	/* The XDP program wants to see the packet starting at the MAC
 	 * header.
 	 */
-	mac_len = skb->data - skb_mac_header(skb);
-	hlen = skb_headlen(skb) + mac_len;
-	xdp->data = skb->data - mac_len;
+	*mac_len = skb->data - skb_mac_header(skb);
+	hlen = skb_headlen(skb) + *mac_len;
+	xdp->data = skb->data - *mac_len;
 	xdp->data_meta = xdp->data;
 	xdp->data_end = xdp->data + hlen;
 	xdp->data_hard_start = skb->data - skb_headroom(skb);
-	orig_data_end = xdp->data_end;
-	orig_data = xdp->data;
+	*orig_data_end = xdp->data_end;
+	*orig_data = xdp->data;
 
 	rxqueue = netif_get_rxqueue(skb);
 	xdp->rxq = &rxqueue->xdp_rxq;
+	/* is actually XDP_ABORTED, but here we use it to mean "go ahead and
+	 * run the xdp program"
+	 */
+	return 0;
+do_drop:
+	kfree_skb(skb);
+	return XDP_DROP;
+}
 
-	act = bpf_prog_run_xdp(xdp_prog, xdp);
+static u32 netif_receive_generic_xdp_finish(struct sk_buff *skb,
+					    struct xdp_buff *xdp,
+					    struct bpf_prog *xdp_prog,
+					    void *orig_data,
+					    void *orig_data_end,
+					    u32 act, u32 mac_len)
+{
+	u32 metalen;
+	int off;
 
 	off = xdp->data - orig_data;
 	if (off > 0)
@@ -4082,7 +4097,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	if (off != 0) {
 		skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
 		skb->len -= off;
-
 	}
 
 	switch (act) {
@@ -4102,7 +4116,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		trace_xdp_exception(skb->dev, xdp_prog, act);
 		/* fall through */
 	case XDP_DROP:
-	do_drop:
 		kfree_skb(skb);
 		break;
 	}
@@ -4110,6 +4123,23 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	return act;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct xdp_buff *xdp,
+				     struct bpf_prog *xdp_prog)
+{
+	void *orig_data, *orig_data_end;
+	u32 act, mac_len;
+
+	act = netif_receive_generic_xdp_prepare(skb, xdp, &orig_data,
+						&orig_data_end, &mac_len);
+	if (act)
+		return act;
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	return netif_receive_generic_xdp_finish(skb, xdp, xdp_prog,
+						orig_data, orig_data_end, act,
+						mac_len);
+}
+
 /* When doing generic XDP we have to bypass the qdisc layer and the
  * network taps in order to match in-driver-XDP behavior.
  */
@@ -4168,6 +4198,93 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(do_xdp_generic);
 
+struct bpf_work {
+	struct list_head list;
+	void *ctx;
+	struct redirect_info ri;
+	unsigned long ret;
+};
+
+struct xdp_work {
+	struct bpf_work w;
+	struct xdp_buff xdp;
+	struct sk_buff *skb;
+	void *orig_data;
+	void *orig_data_end;
+	u32 mac_len;
+};
+
+/* Storage area for per-packet Generic XDP metadata */
+static DEFINE_PER_CPU(struct xdp_work[NAPI_POLL_WEIGHT], xdp_work);
+
+static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
+				struct sk_buff_head *list,
+				struct sk_buff_head *pass_list)
+{
+	struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
+	struct bpf_work *bw;
+	struct sk_buff *skb;
+	LIST_HEAD(xdp_list);
+	int n = 0, i, err;
+	u32 act;
+
+	if (!xdp_prog) {
+		/* PASS everything */
+		skb_queue_splice_init(list, pass_list);
+		return;
+	}
+
+	xwa = this_cpu_ptr(&xdp_work);
+
+	skb_queue_for_each(skb, list) {
+		if (WARN_ON(n > NAPI_POLL_WEIGHT))
+			 /* checked in caller, can't happen */
+			 return;
+		xw = (*xwa) + n++;
+		memset(xw, 0, sizeof(*xw));
+		xw->skb = skb;
+		xw->w.ctx = &xw->xdp;
+		act = netif_receive_generic_xdp_prepare(skb, &xw->xdp,
+							&xw->orig_data,
+							&xw->orig_data_end,
+							&xw->mac_len);
+		if (act)
+			xw->w.ret = act;
+		else
+			list_add_tail(&xw->w.list, &xdp_list);
+	}
+
+	list_for_each_entry(bw, &xdp_list, list) {
+		bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
+		bw->ri = *this_cpu_ptr(&redirect_info);
+	}
+
+	for (i = 0; i < n; i++) {
+		xw = (*xwa) + i;
+		act = netif_receive_generic_xdp_finish(xw->skb, &xw->xdp,
+						       xdp_prog, xw->orig_data,
+						       xw->orig_data_end,
+						       xw->w.ret, xw->mac_len);
+		if (act != XDP_PASS) {
+			switch (act) {
+			case XDP_REDIRECT:
+				*this_cpu_ptr(&redirect_info) = xw->w.ri;
+				err = xdp_do_generic_redirect(xw->skb->dev,
+							      xw->skb, &xw->xdp,
+							      xdp_prog);
+				if (err) /* free and drop */
+					kfree_skb(xw->skb);
+				break;
+			case XDP_TX:
+				generic_xdp_tx(xw->skb, xdp_prog);
+				break;
+			}
+		} else {
+			__skb_queue_tail(pass_list, xw->skb);
+		}
+	}
+}
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4878,7 +4995,7 @@ static void netif_receive_skb_list_internal(struct sk_buff_head *list)
 {
 	/* Two sublists so we can go back and forth between them */
 	struct sk_buff_head sublist, sublist2;
-	struct bpf_prog *xdp_prog = NULL;
+	struct bpf_prog *xdp_prog = NULL, *curr_prog = NULL;
 	struct sk_buff *skb;
 
 	__skb_queue_head_init(&sublist);
@@ -4893,15 +5010,23 @@ static void netif_receive_skb_list_internal(struct sk_buff_head *list)
 
 	__skb_queue_head_init(&sublist2);
 	if (static_branch_unlikely(&generic_xdp_needed_key)) {
+		struct sk_buff_head sublist3;
+		int n = 0;
+
+		__skb_queue_head_init(&sublist3);
 		preempt_disable();
 		rcu_read_lock();
 		while ((skb = __skb_dequeue(&sublist)) != NULL) {
 			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-			if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
-				/* Dropped, don't add to sublist */
-				continue;
-			__skb_queue_tail(&sublist2, skb);
+			if (++n >= NAPI_POLL_WEIGHT || xdp_prog != curr_prog) {
+				do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
+				__skb_queue_head_init(&sublist3);
+				n = 0;
+				curr_prog = xdp_prog;
+			}
+			__skb_queue_tail(&sublist3, skb);
 		}
+		do_xdp_list_generic(curr_prog, &sublist3, &sublist2);
 		rcu_read_unlock();
 		preempt_enable();
 		/* Move all packets onto first sublist */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..c96aff14d76a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2039,15 +2039,7 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
-struct redirect_info {
-	u32 ifindex;
-	u32 flags;
-	struct bpf_map *map;
-	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct redirect_info, redirect_info);
 
 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {

  parent reply	other threads:[~2018-06-26 18:21 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-26 18:15 [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 01/12] net: core: trivial netif_receive_skb_list() entry point Edward Cree
2018-06-27  0:06   ` Eric Dumazet
2018-06-27 14:03     ` Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 02/12] sfc: batch up RX delivery Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 03/12] net: core: unwrap skb list receive slightly further Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 04/12] net: core: Another step of skb receive list processing Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 05/12] net: core: another layer of lists, around PF_MEMALLOC skb handling Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup Edward Cree
2018-06-27 14:36   ` Willem de Bruijn
2018-06-27 14:49     ` Edward Cree
2018-06-27 16:00       ` Willem de Bruijn
2018-06-27 16:34         ` Edward Cree
2018-06-26 18:20 ` [RFC PATCH v2 net-next 07/12] net: ipv4: listified version of ip_rcv Edward Cree
2018-06-27 12:32   ` Florian Westphal
2018-06-26 18:20 ` [RFC PATCH v2 net-next 08/12] net: ipv4: listify ip_rcv_finish Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 09/12] net: don't bother calling list RX functions on empty lists Edward Cree
2018-06-26 18:21 ` Edward Cree [this message]
2018-06-26 18:22 ` [RFC PATCH v2 net-next 11/12] net: listify Generic XDP processing, part 2 Edward Cree
2018-06-26 18:22 ` [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP processing on x86_64 Edward Cree
2018-06-26 20:48 ` [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Tom Herbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6ca465cf-c070-d4fe-73a8-b3bd8726526d@solarflare.com \
    --to=ecree@solarflare.com \
    --cc=davem@davemloft.net \
    --cc=linux-net-drivers@solarflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.