All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next RFC] Generic XDP
@ 2017-04-09 20:35 David Miller
  2017-04-10  2:18 ` Alexei Starovoitov
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: David Miller @ 2017-04-09 20:35 UTC (permalink / raw)
  To: netdev; +Cc: xdp-newbies


This provides a generic non-optimized XDP implementation when the
device driver does not provide an optimized one.

It is arguable that perhaps I should have required something like
this as part of the initial XDP feature merge.

I believe this is critical for two reasons:

1) Accessibility.  More people can play with XDP with less
   dependencies.  Yes I know we have XDP support in virtio_net, but
   that just creates another depedency for learning how to use this
   facility.

   I wrote this to make life easier for the XDP newbies.

2) As a model for what the expected semantics are.  If there is a pure
   generic core implementation, it serves as a semantic example for
   driver folks adding XDP support.

This is just a rough draft and is untested.

Signed-off-by: David S. Miller <davem@davemloft.net>
---

v2:
 - Add some "fall through" comments in switch statements based
   upon feedback from Andrew Lunn
 - Use RCU for generic xdp_prog, thanks to Johannes Berg.

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc07c3b..f8ff49c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1892,6 +1892,7 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
+	struct bpf_prog __rcu	*xdp_prog;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 7efb417..ad6de2d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -95,6 +95,7 @@
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
 #include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/busy_poll.h>
@@ -4247,6 +4248,83 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	struct bpf_prog *new = xdp->prog;
+	int ret = 0;
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG: {
+		struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
+
+		rcu_assign_pointer(dev->xdp_prog, new);
+		if (old)
+			bpf_prog_put(old);
+
+		if (old && !new)
+			static_key_slow_dec(&generic_xdp_needed);
+		else if (new && !old)
+			static_key_slow_inc(&generic_xdp_needed);
+		break;
+	}
+
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct bpf_prog *xdp_prog)
+{
+	struct xdp_buff xdp;
+	u32 act = XDP_DROP;
+	void *orig_data;
+	int hlen, off;
+
+	if (skb_linearize(skb))
+		goto do_drop;
+
+	hlen = skb_headlen(skb);
+	xdp.data = skb->data;
+	xdp.data_end = xdp.data + hlen;
+	xdp.data_hard_start = xdp.data - skb_headroom(skb);
+	orig_data = xdp.data;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	off = xdp.data - orig_data;
+	if (off)
+		__skb_push(skb, off);
+
+	switch (act) {
+	case XDP_PASS:
+	case XDP_TX:
+		break;
+
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(skb->dev, xdp_prog, act);
+		/* fall through */
+	case XDP_DROP:
+	do_drop:
+		kfree_skb(skb);
+		break;
+	}
+
+	return act;
+}
+
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4258,6 +4336,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 
 	rcu_read_lock();
 
+	if (static_key_false(&generic_xdp_needed)) {
+		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+		if (xdp_prog) {
+			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+
+			if (act != XDP_PASS) {
+				rcu_read_unlock();
+				if (act == XDP_TX)
+					dev_queue_xmit(skb);
+				return NET_RX_DROP;
+			}
+		}
+	}
+
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -6718,6 +6811,7 @@ EXPORT_SYMBOL(dev_change_proto_down);
  */
 int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 {
+	int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct bpf_prog *prog = NULL;
 	struct netdev_xdp xdp;
@@ -6725,14 +6819,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 
 	ASSERT_RTNL();
 
-	if (!ops->ndo_xdp)
-		return -EOPNOTSUPP;
+	xdp_op = ops->ndo_xdp;
+	if (!xdp_op)
+		xdp_op = generic_xdp_install;
+
 	if (fd >= 0) {
 		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
 			memset(&xdp, 0, sizeof(xdp));
 			xdp.command = XDP_QUERY_PROG;
 
-			err = ops->ndo_xdp(dev, &xdp);
+			err = xdp_op(dev, &xdp);
 			if (err < 0)
 				return err;
 			if (xdp.prog_attached)
@@ -6748,7 +6844,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 	xdp.command = XDP_SETUP_PROG;
 	xdp.prog = prog;
 
-	err = ops->ndo_xdp(dev, &xdp);
+	err = xdp_op(dev, &xdp);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
 
@@ -7789,6 +7885,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
 void free_netdev(struct net_device *dev)
 {
 	struct napi_struct *p, *n;
+	struct bpf_prog *prog;
 
 	might_sleep();
 	netif_free_tx_queues(dev);
@@ -7807,6 +7904,12 @@ void free_netdev(struct net_device *dev)
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
 
+	prog = rcu_dereference(dev->xdp_prog);
+	if (prog) {
+		bpf_prog_put(prog);
+		static_key_slow_dec(&generic_xdp_needed);
+	}
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		netdev_freemem(dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b2bd4c9..c73c7b7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,15 +896,12 @@ static size_t rtnl_port_size(const struct net_device *dev,
 		return port_self_size;
 }
 
-static size_t rtnl_xdp_size(const struct net_device *dev)
+static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
 			  nla_total_size(1);	/* XDP_ATTACHED */
 
-	if (!dev->netdev_ops->ndo_xdp)
-		return 0;
-	else
-		return xdp_size;
+	return xdp_size;
 }
 
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
@@ -943,7 +940,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
-	       + rtnl_xdp_size(dev) /* IFLA_XDP */
+	       + rtnl_xdp_size() /* IFLA_XDP */
 	       + nla_total_size(4)  /* IFLA_EVENT */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
@@ -1252,20 +1249,25 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
-	struct netdev_xdp xdp_op = {};
 	struct nlattr *xdp;
 	int err;
+	u8 val;
 
-	if (!dev->netdev_ops->ndo_xdp)
-		return 0;
 	xdp = nla_nest_start(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
-	xdp_op.command = XDP_QUERY_PROG;
-	err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
-	if (err)
-		goto err_cancel;
-	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+	if (dev->netdev_ops->ndo_xdp) {
+		struct netdev_xdp xdp_op = {};
+
+		xdp_op.command = XDP_QUERY_PROG;
+		err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+		if (err)
+			goto err_cancel;
+		val = xdp_op.prog_attached;
+	} else {
+		val = rcu_access_pointer(dev->xdp_prog) ? 1 : 0;
+	}
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
 	if (err)
 		goto err_cancel;
 

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-09 20:35 [PATCH v2 net-next RFC] Generic XDP David Miller
@ 2017-04-10  2:18 ` Alexei Starovoitov
  2017-04-10 16:57   ` Willem de Bruijn
                     ` (2 more replies)
  2017-04-10 18:39 ` Andy Gospodarek
                   ` (3 subsequent siblings)
  4 siblings, 3 replies; 19+ messages in thread
From: Alexei Starovoitov @ 2017-04-10  2:18 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, xdp-newbies

On Sun, Apr 09, 2017 at 01:35:28PM -0700, David Miller wrote:
> 
> This provides a generic non-optimized XDP implementation when the
> device driver does not provide an optimized one.
> 
> It is arguable that perhaps I should have required something like
> this as part of the initial XDP feature merge.
> 
> I believe this is critical for two reasons:
> 
> 1) Accessibility.  More people can play with XDP with less
>    dependencies.  Yes I know we have XDP support in virtio_net, but
>    that just creates another depedency for learning how to use this
>    facility.
> 
>    I wrote this to make life easier for the XDP newbies.
> 
> 2) As a model for what the expected semantics are.  If there is a pure
>    generic core implementation, it serves as a semantic example for
>    driver folks adding XDP support.
> 
> This is just a rough draft and is untested.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>
...
> +static u32 netif_receive_generic_xdp(struct sk_buff *skb,
> +				     struct bpf_prog *xdp_prog)
> +{
> +	struct xdp_buff xdp;
> +	u32 act = XDP_DROP;
> +	void *orig_data;
> +	int hlen, off;
> +
> +	if (skb_linearize(skb))
> +		goto do_drop;

do we need to force disable gro ?
Otherwise if we linearize skb_is_gso packet it will be huge
and not similar to normal xdp packets?
gso probably needs to disabled too to avoid veth surprises?

> +
> +	hlen = skb_headlen(skb);
> +	xdp.data = skb->data;

it probably should be
hlen = skb_headlen(skb) + skb->mac_len;
xdp.data = skb->data - skb->mac_len;
to make sure xdp program is looking at l2 header.

> +	xdp.data_end = xdp.data + hlen;
> +	xdp.data_hard_start = xdp.data - skb_headroom(skb);
> +	orig_data = xdp.data;
> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +
> +	off = xdp.data - orig_data;
> +	if (off)
> +		__skb_push(skb, off);

and restore l2 back somehow and get new skb->protocol ?
if we simply do __skb_pull(skb, skb->mac_len); like
we do with cls_bpf, it will not work correctly,
since if the program did ip->ipip encap (like our balancer
does and the test tools/testing/selftests/bpf/test_xdp.c)
the skb metadata fields will be wrong.
So we need to repeat eth_type_trans() here if (xdp.data != orig_data)

In case of cls_bpf when we mess with skb sizes we always
adjust skb metafields in helpers, so there it's fine
and __skb_pull(skb, skb->mac_len); is enough.
Here we need to be a bit more careful.

>  static int netif_receive_skb_internal(struct sk_buff *skb)
>  {
>  	int ret;
> @@ -4258,6 +4336,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
>  
>  	rcu_read_lock();
>  
> +	if (static_key_false(&generic_xdp_needed)) {
> +		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
> +
> +		if (xdp_prog) {
> +			u32 act = netif_receive_generic_xdp(skb, xdp_prog);

That's indeed the best attachment point in the stack.
I was trying to see whether it can be lowered into something like
dev_gro_receive(), but not everyone calls it.
Another option to put it into eth_type_trans() itself, then
there are no problems with gro, l2 headers, and adjust_head,
but changing all drivers is too much.

> +
> +			if (act != XDP_PASS) {
> +				rcu_read_unlock();
> +				if (act == XDP_TX)
> +					dev_queue_xmit(skb);

It should be fine. For cls_bpf we do recursion check __bpf_tx_skb()
but I forgot specific details. May be here it's fine as-is.
Daniel, do we need recursion check here?

> @@ -6725,14 +6819,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
>  
>  	ASSERT_RTNL();
>  
> -	if (!ops->ndo_xdp)
> -		return -EOPNOTSUPP;
> +	xdp_op = ops->ndo_xdp;
> +	if (!xdp_op)
> +		xdp_op = generic_xdp_install;

I suspect there always be drivers that don't support xdp (like e100),
so this generic_xdp_install() will stay with us forever.
Since it will stay, can we enable it for xdp enabled drivers too?
This will allow us to test both raw xdp and skb-based paths neck to neck.
Today bpf-newbies typically start developing with cls_bpf, since
it can run on tap/veth and then refactor the program to xdp.
Unfortunately cls_bpf and xdp programs are substantially different,
so it pretty much means that cls_bpf prog is a throw away.
If we can add a flag to this xdp netlink attach command
that says 'enable skb-based xdp', we'll have exactly the same
program running on raw dma buffer and on skb, which will help
developing on veth/tap and moving the same prog into physical
eth0 later. And users will be able to switch between skb-based
mode on eth0 and raw-buffer mode back and forth to see perf difference
(and hopefully nothing else).

Another advantage that it will help to flush out the differences
between skb- and raw- modes in the drivers that support xdp already.

Yet another benefit is it will allow measuring of cost of skb-alloc path.

Right now we have XDP_FLAGS_UPDATE_IF_NOEXIST flag.
We can add something like XDP_FLAGS_SKB_MODE flag for this purpose
and in the drivers that don't support XDP at the moment, this flag
will be assumed automatically.
Thoughts?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10  2:18 ` Alexei Starovoitov
@ 2017-04-10 16:57   ` Willem de Bruijn
  2017-04-10 19:33   ` David Miller
  2017-04-10 19:50   ` Daniel Borkmann
  2 siblings, 0 replies; 19+ messages in thread
From: Willem de Bruijn @ 2017-04-10 16:57 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David Miller, Network Development, xdp-newbies

>>  static int netif_receive_skb_internal(struct sk_buff *skb)
>>  {
>>       int ret;
>> @@ -4258,6 +4336,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
>>
>>       rcu_read_lock();
>>
>> +     if (static_key_false(&generic_xdp_needed)) {
>> +             struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
>> +
>> +             if (xdp_prog) {
>> +                     u32 act = netif_receive_generic_xdp(skb, xdp_prog);
>
> That's indeed the best attachment point in the stack.
> I was trying to see whether it can be lowered into something like
> dev_gro_receive(), but not everyone calls it.

It would be a helpful (follow-on) optimization for packets that do
pass through it. It allows skb recycling with napi_reuse_skb and can
be used to protect if a vulnerability in the gro stack pops up.

> Another option to put it into eth_type_trans() itself, then
> there are no problems with gro, l2 headers, and adjust_head,
> but changing all drivers is too much.
>
>> +
>> +                     if (act != XDP_PASS) {
>> +                             rcu_read_unlock();
>> +                             if (act == XDP_TX)
>> +                                     dev_queue_xmit(skb);
>
> It should be fine. For cls_bpf we do recursion check __bpf_tx_skb()
> but I forgot specific details. May be here it's fine as-is.
> Daniel, do we need recursion check here?

That limiter is for egress redirecting to egress, I believe. This
ingress to egress will go through netif_rx and a softirq if looping.

Another point on redirect is clearing skb state. queue_mapping and
sender_cpu will be dirty, but should be able to handle it. It seems
possible to attach to a virtual device, such as a tunnel. In that case
the packet may have gone through a complex receive path before
reaching the tunnel, including tc ingress, so even more skb fields may
be set (e.g., priority). The same holds for act_mirred or
__bpf_redirect, so I assume that this is safe.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-09 20:35 [PATCH v2 net-next RFC] Generic XDP David Miller
  2017-04-10  2:18 ` Alexei Starovoitov
@ 2017-04-10 18:39 ` Andy Gospodarek
  2017-04-10 19:28   ` David Miller
                     ` (2 more replies)
  2017-04-10 19:28 ` Stephen Hemminger
                   ` (2 subsequent siblings)
  4 siblings, 3 replies; 19+ messages in thread
From: Andy Gospodarek @ 2017-04-10 18:39 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, xdp-newbies

On Sun, Apr 09, 2017 at 01:35:28PM -0700, David Miller wrote:
> 
> This provides a generic non-optimized XDP implementation when the
> device driver does not provide an optimized one.
> 
> It is arguable that perhaps I should have required something like
> this as part of the initial XDP feature merge.
> 
> I believe this is critical for two reasons:
> 
> 1) Accessibility.  More people can play with XDP with less
>    dependencies.  Yes I know we have XDP support in virtio_net, but
>    that just creates another depedency for learning how to use this
>    facility.
> 
>    I wrote this to make life easier for the XDP newbies.
> 
> 2) As a model for what the expected semantics are.  If there is a pure
>    generic core implementation, it serves as a semantic example for
>    driver folks adding XDP support.
> 
> This is just a rough draft and is untested.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

As promised, I did some testing today with bnxt_en's implementation of
XDP and this one.

I ran this on a desktop-class system I have (i7-6700 CPU @ 3.40GHz)
and used pktgen_sample03_burst_single_flow.sh from another system to
throw ~6.5Mpps as a single UDP stream towards the system running XDP.

I just commented out the ndo_xdp op in bnxt.c to test this patch.  The
differences were pretty dramatic.

Using the ndo_xdp ndo in bnxt_en, my perf report output looked like
this (not sure why X is still running on this system!):

    38.08%  swapper          [sysimgblt]                   [k] 0x0000000000005bd0
    11.80%  swapper          [kernel.vmlinux]              [k] intel_idle
    10.49%  swapper          [bnxt_en]                     [k] bnxt_rx_pkt
     6.31%  swapper          [bnxt_en]                     [k] bnxt_rx_xdp
     5.64%  swapper          [bnxt_en]                     [k] bnxt_poll
     4.22%  swapper          [kernel.vmlinux]              [k] poll_idle
     3.46%  swapper          [kernel.vmlinux]              [k] irq_entries_start
     2.95%  swapper          [kernel.vmlinux]              [k] napi_complete_done
     1.79%  swapper          [kernel.vmlinux]              [k] cpuidle_enter_state
     1.53%  swapper          [kernel.vmlinux]              [k] menu_select
     1.19%  swapper          [bnxt_en]                     [k] bnxt_reuse_rx_data
     1.00%  swapper          [sysimgblt]                   [k] 0x0000000000005c6f
     0.92%  swapper          [kernel.vmlinux]              [k] __next_timer_interrupt
     0.71%  swapper          [kernel.vmlinux]              [k] _raw_spin_lock_irqsave
     0.71%  swapper          [kernel.vmlinux]              [k] bpf_map_lookup_elem

mpstat reports that the CPU receiving and dropping the traffic is
basically running idle.  Dropping this amount of traffic in the driver
has very little impact on the system.

With v2 of this patch I see the following from perf report:

    19.69%  ksoftirqd/3      [kernel.vmlinux]              [k] memcpy_erms
    16.30%  ksoftirqd/3      [kernel.vmlinux]              [k] __bpf_prog_run
    10.11%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_rx_pkt
     7.69%  ksoftirqd/3      [kernel.vmlinux]              [k] __build_skb
     4.25%  ksoftirqd/3      [kernel.vmlinux]              [k] inet_gro_receive
     3.74%  ksoftirqd/3      [kernel.vmlinux]              [k] kmem_cache_alloc
     3.53%  ksoftirqd/3      [kernel.vmlinux]              [k] dev_gro_receive
     3.43%  ksoftirqd/3      [kernel.vmlinux]              [k] page_frag_free
     3.12%  ksoftirqd/3      [kernel.vmlinux]              [k] kmem_cache_free
     2.56%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_poll
     2.46%  ksoftirqd/3      [kernel.vmlinux]              [k] netif_receive_skb_internal
     2.13%  ksoftirqd/3      [kernel.vmlinux]              [k] __udp4_lib_lookup
     1.63%  ksoftirqd/3      [kernel.vmlinux]              [k] __napi_alloc_skb
     1.51%  ksoftirqd/3      [kernel.vmlinux]              [k] eth_type_trans
     1.42%  ksoftirqd/3      [kernel.vmlinux]              [k] udp_gro_receive
     1.29%  ksoftirqd/3      [kernel.vmlinux]              [k] napi_gro_receive
     1.25%  ksoftirqd/3      [kernel.vmlinux]              [k] udp4_gro_receive
     1.18%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_rx_xdp
     1.17%  ksoftirqd/3      [kernel.vmlinux]              [k] skb_release_data
     1.11%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_reuse_rx_data
     1.07%  ksoftirqd/3      [kernel.vmlinux]              [k] net_rx_action

mpstat reports that the CPU receiving and dropping the traffic is about
98% utilized running in softirq context.

Unsurprisingly, in-driver XDP is significantly more efficient than in-stack
XDP.

I also noted that...

xdp1 not parsing the protocol correctly.   All traffic was showing up as
protocol '0' instead of '17' in the output.  

xdp2 was not actually sending the frames back out when using this patch.

I'll take a look in a bit and see if I can track these two issues down.

> ---
> 
> v2:
>  - Add some "fall through" comments in switch statements based
>    upon feedback from Andrew Lunn
>  - Use RCU for generic xdp_prog, thanks to Johannes Berg.
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index cc07c3b..f8ff49c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1892,6 +1892,7 @@ struct net_device {
>  	struct lock_class_key	*qdisc_tx_busylock;
>  	struct lock_class_key	*qdisc_running_key;
>  	bool			proto_down;
> +	struct bpf_prog __rcu	*xdp_prog;
>  };
>  #define to_net_dev(d) container_of(d, struct net_device, dev)
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 7efb417..ad6de2d 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -95,6 +95,7 @@
>  #include <linux/notifier.h>
>  #include <linux/skbuff.h>
>  #include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>  #include <net/net_namespace.h>
>  #include <net/sock.h>
>  #include <net/busy_poll.h>
> @@ -4247,6 +4248,83 @@ static int __netif_receive_skb(struct sk_buff *skb)
>  	return ret;
>  }
>  
> +static struct static_key generic_xdp_needed __read_mostly;
> +
> +static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
> +{
> +	struct bpf_prog *new = xdp->prog;
> +	int ret = 0;
> +
> +	switch (xdp->command) {
> +	case XDP_SETUP_PROG: {
> +		struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
> +
> +		rcu_assign_pointer(dev->xdp_prog, new);
> +		if (old)
> +			bpf_prog_put(old);
> +
> +		if (old && !new)
> +			static_key_slow_dec(&generic_xdp_needed);
> +		else if (new && !old)
> +			static_key_slow_inc(&generic_xdp_needed);
> +		break;
> +	}
> +
> +	case XDP_QUERY_PROG:
> +		xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
> +		break;
> +
> +	default:
> +		ret = -EINVAL;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static u32 netif_receive_generic_xdp(struct sk_buff *skb,
> +				     struct bpf_prog *xdp_prog)
> +{
> +	struct xdp_buff xdp;
> +	u32 act = XDP_DROP;
> +	void *orig_data;
> +	int hlen, off;
> +
> +	if (skb_linearize(skb))
> +		goto do_drop;
> +
> +	hlen = skb_headlen(skb);
> +	xdp.data = skb->data;
> +	xdp.data_end = xdp.data + hlen;
> +	xdp.data_hard_start = xdp.data - skb_headroom(skb);
> +	orig_data = xdp.data;
> +
> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +
> +	off = xdp.data - orig_data;
> +	if (off)
> +		__skb_push(skb, off);
> +
> +	switch (act) {
> +	case XDP_PASS:
> +	case XDP_TX:
> +		break;
> +
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +		/* fall through */
> +	case XDP_ABORTED:
> +		trace_xdp_exception(skb->dev, xdp_prog, act);
> +		/* fall through */
> +	case XDP_DROP:
> +	do_drop:
> +		kfree_skb(skb);
> +		break;
> +	}
> +
> +	return act;
> +}
> +
>  static int netif_receive_skb_internal(struct sk_buff *skb)
>  {
>  	int ret;
> @@ -4258,6 +4336,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
>  
>  	rcu_read_lock();
>  
> +	if (static_key_false(&generic_xdp_needed)) {
> +		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
> +
> +		if (xdp_prog) {
> +			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
> +
> +			if (act != XDP_PASS) {
> +				rcu_read_unlock();
> +				if (act == XDP_TX)
> +					dev_queue_xmit(skb);
> +				return NET_RX_DROP;
> +			}
> +		}
> +	}
> +
>  #ifdef CONFIG_RPS
>  	if (static_key_false(&rps_needed)) {
>  		struct rps_dev_flow voidflow, *rflow = &voidflow;
> @@ -6718,6 +6811,7 @@ EXPORT_SYMBOL(dev_change_proto_down);
>   */
>  int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
>  {
> +	int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
>  	const struct net_device_ops *ops = dev->netdev_ops;
>  	struct bpf_prog *prog = NULL;
>  	struct netdev_xdp xdp;
> @@ -6725,14 +6819,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
>  
>  	ASSERT_RTNL();
>  
> -	if (!ops->ndo_xdp)
> -		return -EOPNOTSUPP;
> +	xdp_op = ops->ndo_xdp;
> +	if (!xdp_op)
> +		xdp_op = generic_xdp_install;
> +
>  	if (fd >= 0) {
>  		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
>  			memset(&xdp, 0, sizeof(xdp));
>  			xdp.command = XDP_QUERY_PROG;
>  
> -			err = ops->ndo_xdp(dev, &xdp);
> +			err = xdp_op(dev, &xdp);
>  			if (err < 0)
>  				return err;
>  			if (xdp.prog_attached)
> @@ -6748,7 +6844,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
>  	xdp.command = XDP_SETUP_PROG;
>  	xdp.prog = prog;
>  
> -	err = ops->ndo_xdp(dev, &xdp);
> +	err = xdp_op(dev, &xdp);
>  	if (err < 0 && prog)
>  		bpf_prog_put(prog);
>  
> @@ -7789,6 +7885,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
>  void free_netdev(struct net_device *dev)
>  {
>  	struct napi_struct *p, *n;
> +	struct bpf_prog *prog;
>  
>  	might_sleep();
>  	netif_free_tx_queues(dev);
> @@ -7807,6 +7904,12 @@ void free_netdev(struct net_device *dev)
>  	free_percpu(dev->pcpu_refcnt);
>  	dev->pcpu_refcnt = NULL;
>  
> +	prog = rcu_dereference(dev->xdp_prog);
> +	if (prog) {
> +		bpf_prog_put(prog);
> +		static_key_slow_dec(&generic_xdp_needed);
> +	}
> +
>  	/*  Compatibility with error handling in drivers */
>  	if (dev->reg_state == NETREG_UNINITIALIZED) {
>  		netdev_freemem(dev);
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index b2bd4c9..c73c7b7 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -896,15 +896,12 @@ static size_t rtnl_port_size(const struct net_device *dev,
>  		return port_self_size;
>  }
>  
> -static size_t rtnl_xdp_size(const struct net_device *dev)
> +static size_t rtnl_xdp_size(void)
>  {
>  	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
>  			  nla_total_size(1);	/* XDP_ATTACHED */
>  
> -	if (!dev->netdev_ops->ndo_xdp)
> -		return 0;
> -	else
> -		return xdp_size;
> +	return xdp_size;
>  }
>  
>  static noinline size_t if_nlmsg_size(const struct net_device *dev,
> @@ -943,7 +940,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
>  	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
>  	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
>  	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
> -	       + rtnl_xdp_size(dev) /* IFLA_XDP */
> +	       + rtnl_xdp_size() /* IFLA_XDP */
>  	       + nla_total_size(4)  /* IFLA_EVENT */
>  	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
>  
> @@ -1252,20 +1249,25 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
>  
>  static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
>  {
> -	struct netdev_xdp xdp_op = {};
>  	struct nlattr *xdp;
>  	int err;
> +	u8 val;
>  
> -	if (!dev->netdev_ops->ndo_xdp)
> -		return 0;
>  	xdp = nla_nest_start(skb, IFLA_XDP);
>  	if (!xdp)
>  		return -EMSGSIZE;
> -	xdp_op.command = XDP_QUERY_PROG;
> -	err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
> -	if (err)
> -		goto err_cancel;
> -	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
> +	if (dev->netdev_ops->ndo_xdp) {
> +		struct netdev_xdp xdp_op = {};
> +
> +		xdp_op.command = XDP_QUERY_PROG;
> +		err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
> +		if (err)
> +			goto err_cancel;
> +		val = xdp_op.prog_attached;
> +	} else {
> +		val = rcu_access_pointer(dev->xdp_prog) ? 1 : 0;
> +	}
> +	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
>  	if (err)
>  		goto err_cancel;
>  

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-09 20:35 [PATCH v2 net-next RFC] Generic XDP David Miller
  2017-04-10  2:18 ` Alexei Starovoitov
  2017-04-10 18:39 ` Andy Gospodarek
@ 2017-04-10 19:28 ` Stephen Hemminger
  2017-04-10 21:08 ` Daniel Borkmann
  2017-04-11 16:28 ` Eric Dumazet
  4 siblings, 0 replies; 19+ messages in thread
From: Stephen Hemminger @ 2017-04-10 19:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, xdp-newbies

On Sun, 09 Apr 2017 13:35:28 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> This provides a generic non-optimized XDP implementation when the
> device driver does not provide an optimized one.
> 
> It is arguable that perhaps I should have required something like
> this as part of the initial XDP feature merge.
> 
> I believe this is critical for two reasons:
> 
> 1) Accessibility.  More people can play with XDP with less
>    dependencies.  Yes I know we have XDP support in virtio_net, but
>    that just creates another depedency for learning how to use this
>    facility.
> 
>    I wrote this to make life easier for the XDP newbies.
> 
> 2) As a model for what the expected semantics are.  If there is a pure
>    generic core implementation, it serves as a semantic example for
>    driver folks adding XDP support.
> 
> This is just a rough draft and is untested.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

Agreed, XDP really needs this

> v2:
>  - Add some "fall through" comments in switch statements based
>    upon feedback from Andrew Lunn
>  - Use RCU for generic xdp_prog, thanks to Johannes Berg.
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index cc07c3b..f8ff49c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1892,6 +1892,7 @@ struct net_device {
>  	struct lock_class_key	*qdisc_tx_busylock;
>  	struct lock_class_key	*qdisc_running_key;
>  	bool			proto_down;
> +	struct bpf_prog __rcu	*xdp_prog;
>  };
>  #define to_net_dev(d) container_of(d, struct net_device, dev)
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 7efb417..ad6de2d 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c

...
 
> +	if (static_key_false(&generic_xdp_needed)) {
> +		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
> +
> +		if (xdp_prog) {
> +			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
> +
> +			if (act != XDP_PASS) {
> +				rcu_read_unlock();
> +				if (act == XDP_TX)
> +					dev_queue_xmit(skb);
> +				return NET_RX_DROP;
> +			}

Wouldn't it be cleaner if XDP returned an enumerated type and a
switch was used? Safer than allowing arbitrary int.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 18:39 ` Andy Gospodarek
@ 2017-04-10 19:28   ` David Miller
  2017-04-10 21:30     ` Andy Gospodarek
  2017-04-10 19:34   ` David Miller
  2017-04-10 20:12   ` Daniel Borkmann
  2 siblings, 1 reply; 19+ messages in thread
From: David Miller @ 2017-04-10 19:28 UTC (permalink / raw)
  To: andy; +Cc: netdev, xdp-newbies

From: Andy Gospodarek <andy@greyhouse.net>
Date: Mon, 10 Apr 2017 14:39:35 -0400

> As promised, I did some testing today with bnxt_en's implementation
> of XDP and this one.

Thanks a lot Andy, obviously the patch needs some more work.

I noticed GRO stuff in your profile, and Alexei mentioned this earlier
today.  We probably should elide GRO if generic XDP is attached, since
in particular this makes the skb_linearize() really expensive.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10  2:18 ` Alexei Starovoitov
  2017-04-10 16:57   ` Willem de Bruijn
@ 2017-04-10 19:33   ` David Miller
  2017-04-10 19:50   ` Daniel Borkmann
  2 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2017-04-10 19:33 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: netdev, xdp-newbies

From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Date: Sun, 9 Apr 2017 19:18:09 -0700

> On Sun, Apr 09, 2017 at 01:35:28PM -0700, David Miller wrote:
>> +	if (skb_linearize(skb))
>> +		goto do_drop;
> 
> do we need to force disable gro ?

I think we do.

> Otherwise if we linearize skb_is_gso packet it will be huge
> and not similar to normal xdp packets?

I completely agree.

> gso probably needs to disabled too to avoid veth surprises?

I'm not so sure about that.

> 
>> +
>> +	hlen = skb_headlen(skb);
>> +	xdp.data = skb->data;
> 
> it probably should be
> hlen = skb_headlen(skb) + skb->mac_len;
> xdp.data = skb->data - skb->mac_len;
> to make sure xdp program is looking at l2 header.

This is why Andy's tests aren't working properly, good find.

>> +	xdp.data_end = xdp.data + hlen;
>> +	xdp.data_hard_start = xdp.data - skb_headroom(skb);
>> +	orig_data = xdp.data;
>> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> +
>> +	off = xdp.data - orig_data;
>> +	if (off)
>> +		__skb_push(skb, off);
> 
> and restore l2 back somehow and get new skb->protocol ?
> if we simply do __skb_pull(skb, skb->mac_len); like
> we do with cls_bpf, it will not work correctly,

Yep.

I completely overlooked the fact that the MAC header was pulled
by the time we get here already.

> I suspect there always be drivers that don't support xdp (like e100),
> so this generic_xdp_install() will stay with us forever.
> Since it will stay, can we enable it for xdp enabled drivers too?

Yes it will be useful for debugging.

Andy already ran into this, he wanted to test it with a driver
that supports XDP so he had to comment out the driver's ndo op.

> Another advantage that it will help to flush out the differences
> between skb- and raw- modes in the drivers that support xdp already.

Indeed, this will be very valuable.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 18:39 ` Andy Gospodarek
  2017-04-10 19:28   ` David Miller
@ 2017-04-10 19:34   ` David Miller
  2017-04-10 21:33     ` Andy Gospodarek
  2017-04-10 20:12   ` Daniel Borkmann
  2 siblings, 1 reply; 19+ messages in thread
From: David Miller @ 2017-04-10 19:34 UTC (permalink / raw)
  To: andy; +Cc: netdev, xdp-newbies

From: Andy Gospodarek <andy@greyhouse.net>
Date: Mon, 10 Apr 2017 14:39:35 -0400

> I also noted that...
> 
> xdp1 not parsing the protocol correctly.   All traffic was showing up as
> protocol '0' instead of '17' in the output.  
> 
> xdp2 was not actually sending the frames back out when using this patch.
> 
> I'll take a look in a bit and see if I can track these two issues down.

Read Alexei's earlier posting, he spotted the bugs you are seeing
here.

Basically, the MAC header is pulled already so we have to push it back
before we run XDP over it.  We also have to be similarly careful with
the MAC header for XDP_TX cases.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10  2:18 ` Alexei Starovoitov
  2017-04-10 16:57   ` Willem de Bruijn
  2017-04-10 19:33   ` David Miller
@ 2017-04-10 19:50   ` Daniel Borkmann
  2 siblings, 0 replies; 19+ messages in thread
From: Daniel Borkmann @ 2017-04-10 19:50 UTC (permalink / raw)
  To: Alexei Starovoitov, David Miller; +Cc: netdev, xdp-newbies

On 04/10/2017 04:18 AM, Alexei Starovoitov wrote:
[...]
>> +	xdp.data_end = xdp.data + hlen;
>> +	xdp.data_hard_start = xdp.data - skb_headroom(skb);
>> +	orig_data = xdp.data;
>> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> +
>> +	off = xdp.data - orig_data;
>> +	if (off)
>> +		__skb_push(skb, off);
>
> and restore l2 back somehow and get new skb->protocol ?
> if we simply do __skb_pull(skb, skb->mac_len); like
> we do with cls_bpf, it will not work correctly,
> since if the program did ip->ipip encap (like our balancer
> does and the test tools/testing/selftests/bpf/test_xdp.c)
> the skb metadata fields will be wrong.
> So we need to repeat eth_type_trans() here if (xdp.data != orig_data)

Yeah, agree. Also, when we have gso skb and rewrite/resize parts
of the packet, we would need to update gso related shinfo meta
data accordingly (f.e. a rewrite from v4/v6, rewrite of whole pkt
as icmp reply, etc)?

Also, what about encap/decap, should inner skb headers get
updated as well along with skb->encapsulation, etc? How do we
handle checksumming on this layer?

> In case of cls_bpf when we mess with skb sizes we always
> adjust skb metafields in helpers, so there it's fine
> and __skb_pull(skb, skb->mac_len); is enough.
> Here we need to be a bit more careful.

In cls_bpf I was looking into something generic and fast for
encap/decap like bpf_xdp_adjust_head() but for skbs. Problem is
that they can be received from ingress/egress and transmitted
further from cls_bpf to ingress/egress, so keeping skb meta data
correct and up to date without exposing skb (implementation)
details like header pointers to users is crucial, as otherwise
these can get messed up potentially affecting the rest of the
system. We restricted helpers in cls_bpf to avoid that. Perhaps
we could make easier assumptions when this generic callback is
known to be called out of a physical driver's rx path, but when
being skb already (as mentioned below by Alexei's thoughts) ...

>>   static int netif_receive_skb_internal(struct sk_buff *skb)
>>   {
>>   	int ret;
>> @@ -4258,6 +4336,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
>>
>>   	rcu_read_lock();
>>
>> +	if (static_key_false(&generic_xdp_needed)) {
>> +		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
>> +
>> +		if (xdp_prog) {
>> +			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
>
> That's indeed the best attachment point in the stack.
> I was trying to see whether it can be lowered into something like
> dev_gro_receive(), but not everyone calls it.
> Another option to put it into eth_type_trans() itself, then
> there are no problems with gro, l2 headers, and adjust_head,
> but changing all drivers is too much.
>
>> +
>> +			if (act != XDP_PASS) {
>> +				rcu_read_unlock();
>> +				if (act == XDP_TX)
>> +					dev_queue_xmit(skb);
>
> It should be fine. For cls_bpf we do recursion check __bpf_tx_skb()
> but I forgot specific details. May be here it's fine as-is.
> Daniel, do we need recursion check here?

Yeah, Willem is correct. That was for sch_handle_egress() to
sch_handle_egress() as that is otherwise not accounted by the
main xmit_recursion check we have in __dev_queue_xmit().

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 18:39 ` Andy Gospodarek
  2017-04-10 19:28   ` David Miller
  2017-04-10 19:34   ` David Miller
@ 2017-04-10 20:12   ` Daniel Borkmann
  2017-04-10 21:41     ` Andy Gospodarek
  2 siblings, 1 reply; 19+ messages in thread
From: Daniel Borkmann @ 2017-04-10 20:12 UTC (permalink / raw)
  To: Andy Gospodarek, David Miller; +Cc: netdev, xdp-newbies

On 04/10/2017 08:39 PM, Andy Gospodarek wrote:
[...]
> I ran this on a desktop-class system I have (i7-6700 CPU @ 3.40GHz)
> and used pktgen_sample03_burst_single_flow.sh from another system to
> throw ~6.5Mpps as a single UDP stream towards the system running XDP.
>
> I just commented out the ndo_xdp op in bnxt.c to test this patch.  The
> differences were pretty dramatic.
>
> Using the ndo_xdp ndo in bnxt_en, my perf report output looked like
> this (not sure why X is still running on this system!):
>
>      38.08%  swapper          [sysimgblt]                   [k] 0x0000000000005bd0
>      11.80%  swapper          [kernel.vmlinux]              [k] intel_idle
>      10.49%  swapper          [bnxt_en]                     [k] bnxt_rx_pkt
>       6.31%  swapper          [bnxt_en]                     [k] bnxt_rx_xdp
>       5.64%  swapper          [bnxt_en]                     [k] bnxt_poll
>       4.22%  swapper          [kernel.vmlinux]              [k] poll_idle
>       3.46%  swapper          [kernel.vmlinux]              [k] irq_entries_start
>       2.95%  swapper          [kernel.vmlinux]              [k] napi_complete_done
>       1.79%  swapper          [kernel.vmlinux]              [k] cpuidle_enter_state
>       1.53%  swapper          [kernel.vmlinux]              [k] menu_select
>       1.19%  swapper          [bnxt_en]                     [k] bnxt_reuse_rx_data
>       1.00%  swapper          [sysimgblt]                   [k] 0x0000000000005c6f
>       0.92%  swapper          [kernel.vmlinux]              [k] __next_timer_interrupt
>       0.71%  swapper          [kernel.vmlinux]              [k] _raw_spin_lock_irqsave
>       0.71%  swapper          [kernel.vmlinux]              [k] bpf_map_lookup_elem
>
> mpstat reports that the CPU receiving and dropping the traffic is
> basically running idle.  Dropping this amount of traffic in the driver
> has very little impact on the system.
>
> With v2 of this patch I see the following from perf report:
>
>      19.69%  ksoftirqd/3      [kernel.vmlinux]              [k] memcpy_erms
>      16.30%  ksoftirqd/3      [kernel.vmlinux]              [k] __bpf_prog_run

Forgot echo 1 > /proc/sys/net/core/bpf_jit_enable? Was it disabled in both cases?

>      10.11%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_rx_pkt
>       7.69%  ksoftirqd/3      [kernel.vmlinux]              [k] __build_skb
>       4.25%  ksoftirqd/3      [kernel.vmlinux]              [k] inet_gro_receive
>       3.74%  ksoftirqd/3      [kernel.vmlinux]              [k] kmem_cache_alloc
>       3.53%  ksoftirqd/3      [kernel.vmlinux]              [k] dev_gro_receive
>       3.43%  ksoftirqd/3      [kernel.vmlinux]              [k] page_frag_free
>       3.12%  ksoftirqd/3      [kernel.vmlinux]              [k] kmem_cache_free
>       2.56%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_poll
>       2.46%  ksoftirqd/3      [kernel.vmlinux]              [k] netif_receive_skb_internal
>       2.13%  ksoftirqd/3      [kernel.vmlinux]              [k] __udp4_lib_lookup
>       1.63%  ksoftirqd/3      [kernel.vmlinux]              [k] __napi_alloc_skb
>       1.51%  ksoftirqd/3      [kernel.vmlinux]              [k] eth_type_trans
>       1.42%  ksoftirqd/3      [kernel.vmlinux]              [k] udp_gro_receive
>       1.29%  ksoftirqd/3      [kernel.vmlinux]              [k] napi_gro_receive
>       1.25%  ksoftirqd/3      [kernel.vmlinux]              [k] udp4_gro_receive
>       1.18%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_rx_xdp
>       1.17%  ksoftirqd/3      [kernel.vmlinux]              [k] skb_release_data
>       1.11%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_reuse_rx_data
>       1.07%  ksoftirqd/3      [kernel.vmlinux]              [k] net_rx_action

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-09 20:35 [PATCH v2 net-next RFC] Generic XDP David Miller
                   ` (2 preceding siblings ...)
  2017-04-10 19:28 ` Stephen Hemminger
@ 2017-04-10 21:08 ` Daniel Borkmann
  2017-04-11 16:28 ` Eric Dumazet
  4 siblings, 0 replies; 19+ messages in thread
From: Daniel Borkmann @ 2017-04-10 21:08 UTC (permalink / raw)
  To: David Miller, netdev; +Cc: xdp-newbies

On 04/09/2017 10:35 PM, David Miller wrote:
[...]
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index cc07c3b..f8ff49c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1892,6 +1892,7 @@ struct net_device {
>   	struct lock_class_key	*qdisc_tx_busylock;
>   	struct lock_class_key	*qdisc_running_key;
>   	bool			proto_down;
> +	struct bpf_prog __rcu	*xdp_prog;

Should this be moved rather near other rx members for locality?
My config has last member of rx group in a new cacheline, which
is really waste ...

[...]
         /* --- cacheline 14 boundary (896 bytes) --- */
         struct hlist_node          index_hlist;          /*   896    16 */

         /* XXX 48 bytes hole, try to pack */
[...]

... but given this layout can change significantly for a given
config, perhaps could be a union with something exotic that is
currently always built in like ax25_ptr; would then require to
depend on ax25 not being built, though. :/ Otoh, we potentially
have to linearize the skb in data path, which is slow anyway,
so probably okay as is, too.

[...]
> @@ -4247,6 +4248,83 @@ static int __netif_receive_skb(struct sk_buff *skb)
>   	return ret;
>   }
>
> +static struct static_key generic_xdp_needed __read_mostly;
> +
> +static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
> +{
> +	struct bpf_prog *new = xdp->prog;
> +	int ret = 0;
> +
> +	switch (xdp->command) {
> +	case XDP_SETUP_PROG: {
> +		struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
> +
> +		rcu_assign_pointer(dev->xdp_prog, new);
> +		if (old)
> +			bpf_prog_put(old);
> +
> +		if (old && !new)
> +			static_key_slow_dec(&generic_xdp_needed);
> +		else if (new && !old)
> +			static_key_slow_inc(&generic_xdp_needed);
> +		break;
> +	}
> +
> +	case XDP_QUERY_PROG:
> +		xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
> +		break;

Would be nice to extend this here, so that in the current 'ip link'
output we could indicate next to the 'xdp' flag that this does /not/
run natively in the driver.

Also, when the user loads a prog f.e. via 'ip link set dev em1 xdp obj prog.o',
we should add a flag where it could be specified that running non-natively
/is/ okay. Thus that the rtnl bits bail out if there's no ndo_xdp callback.
I could imagine that this could be overseen quickly and people wonder why
performance is significantly less than usually expected.

> +
> +	default:
> +		ret = -EINVAL;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static u32 netif_receive_generic_xdp(struct sk_buff *skb,
> +				     struct bpf_prog *xdp_prog)
> +{
> +	struct xdp_buff xdp;
> +	u32 act = XDP_DROP;
> +	void *orig_data;
> +	int hlen, off;
> +
> +	if (skb_linearize(skb))
> +		goto do_drop;
> +
> +	hlen = skb_headlen(skb);
> +	xdp.data = skb->data;
> +	xdp.data_end = xdp.data + hlen;
> +	xdp.data_hard_start = xdp.data - skb_headroom(skb);
> +	orig_data = xdp.data;

Wondering regarding XDP_PACKET_HEADROOM requirement,
presumably we would need to guarantee enough headroom
here as well, so that there's no difference to a native
driver implementation.

> +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +
> +	off = xdp.data - orig_data;
> +	if (off)
> +		__skb_push(skb, off);
> +
> +	switch (act) {
> +	case XDP_PASS:
> +	case XDP_TX:
> +		break;
> +
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +		/* fall through */
> +	case XDP_ABORTED:
> +		trace_xdp_exception(skb->dev, xdp_prog, act);
> +		/* fall through */
> +	case XDP_DROP:
> +	do_drop:
> +		kfree_skb(skb);
> +		break;
> +	}
> +
> +	return act;
> +}

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 19:28   ` David Miller
@ 2017-04-10 21:30     ` Andy Gospodarek
  2017-04-10 21:47       ` Michael Chan
  0 siblings, 1 reply; 19+ messages in thread
From: Andy Gospodarek @ 2017-04-10 21:30 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, xdp-newbies

On Mon, Apr 10, 2017 at 03:28:54PM -0400, David Miller wrote:
> From: Andy Gospodarek <andy@greyhouse.net>
> Date: Mon, 10 Apr 2017 14:39:35 -0400
> 
> > As promised, I did some testing today with bnxt_en's implementation
> > of XDP and this one.
> 
> Thanks a lot Andy, obviously the patch needs some more work.
> 
> I noticed GRO stuff in your profile, and Alexei mentioned this earlier
> today.  We probably should elide GRO if generic XDP is attached, since
> in particular this makes the skb_linearize() really expensive.

Good catch -- I actually thought we were disabling GRO automatically and it
looks like we are not.  :-/  I'll send Michael a patch.  

Disabling GRO allows me to process an additional 1Mpps, so I'm up to 7.5Mpps
with this patch.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 19:34   ` David Miller
@ 2017-04-10 21:33     ` Andy Gospodarek
  0 siblings, 0 replies; 19+ messages in thread
From: Andy Gospodarek @ 2017-04-10 21:33 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, xdp-newbies

On Mon, Apr 10, 2017 at 03:34:56PM -0400, David Miller wrote:
> From: Andy Gospodarek <andy@greyhouse.net>
> Date: Mon, 10 Apr 2017 14:39:35 -0400
> 
> > I also noted that...
> > 
> > xdp1 not parsing the protocol correctly.   All traffic was showing up as
> > protocol '0' instead of '17' in the output.  
> > 
> > xdp2 was not actually sending the frames back out when using this patch.
> > 
> > I'll take a look in a bit and see if I can track these two issues down.
> 
> Read Alexei's earlier posting, he spotted the bugs you are seeing
> here.
> 
> Basically, the MAC header is pulled already so we have to push it back
> before we run XDP over it.  We also have to be similarly careful with
> the MAC header for XDP_TX cases.

Yep, I saw those emails.  I just didn't want to be accused of not running all
the tests.  :-D 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 20:12   ` Daniel Borkmann
@ 2017-04-10 21:41     ` Andy Gospodarek
  2017-04-11 16:05       ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Andy Gospodarek @ 2017-04-10 21:41 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: David Miller, netdev, xdp-newbies

On Mon, Apr 10, 2017 at 10:12:42PM +0200, Daniel Borkmann wrote:
> On 04/10/2017 08:39 PM, Andy Gospodarek wrote:
> [...]
> > I ran this on a desktop-class system I have (i7-6700 CPU @ 3.40GHz)
> > and used pktgen_sample03_burst_single_flow.sh from another system to
> > throw ~6.5Mpps as a single UDP stream towards the system running XDP.
> > 
> > I just commented out the ndo_xdp op in bnxt.c to test this patch.  The
> > differences were pretty dramatic.
> > 
> > Using the ndo_xdp ndo in bnxt_en, my perf report output looked like
> > this (not sure why X is still running on this system!):
> > 
> >      38.08%  swapper          [sysimgblt]                   [k] 0x0000000000005bd0
> >      11.80%  swapper          [kernel.vmlinux]              [k] intel_idle
> >      10.49%  swapper          [bnxt_en]                     [k] bnxt_rx_pkt
> >       6.31%  swapper          [bnxt_en]                     [k] bnxt_rx_xdp
> >       5.64%  swapper          [bnxt_en]                     [k] bnxt_poll
> >       4.22%  swapper          [kernel.vmlinux]              [k] poll_idle
> >       3.46%  swapper          [kernel.vmlinux]              [k] irq_entries_start
> >       2.95%  swapper          [kernel.vmlinux]              [k] napi_complete_done
> >       1.79%  swapper          [kernel.vmlinux]              [k] cpuidle_enter_state
> >       1.53%  swapper          [kernel.vmlinux]              [k] menu_select
> >       1.19%  swapper          [bnxt_en]                     [k] bnxt_reuse_rx_data
> >       1.00%  swapper          [sysimgblt]                   [k] 0x0000000000005c6f
> >       0.92%  swapper          [kernel.vmlinux]              [k] __next_timer_interrupt
> >       0.71%  swapper          [kernel.vmlinux]              [k] _raw_spin_lock_irqsave
> >       0.71%  swapper          [kernel.vmlinux]              [k] bpf_map_lookup_elem
> > 
> > mpstat reports that the CPU receiving and dropping the traffic is
> > basically running idle.  Dropping this amount of traffic in the driver
> > has very little impact on the system.
> > 
> > With v2 of this patch I see the following from perf report:
> > 
> >      19.69%  ksoftirqd/3      [kernel.vmlinux]              [k] memcpy_erms
> >      16.30%  ksoftirqd/3      [kernel.vmlinux]              [k] __bpf_prog_run
> 
> Forgot echo 1 > /proc/sys/net/core/bpf_jit_enable? Was it disabled in both cases?
> 

Good catch.  This must have been a run I did with it off.

Results with bpf_git_enable=1 and gro off, still only 7.5Mpps.

    26.34%  ksoftirqd/5      [kernel.vmlinux]              [k] memcpy_erms
    14.79%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_rx_pkt
    10.11%  ksoftirqd/5      [kernel.vmlinux]              [k] __build_skb
     5.01%  ksoftirqd/5      [kernel.vmlinux]              [k] page_frag_free
     4.66%  ksoftirqd/5      [kernel.vmlinux]              [k] kmem_cache_alloc
     4.19%  ksoftirqd/5      [kernel.vmlinux]              [k] kmem_cache_free
     3.67%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_poll
     2.97%  ksoftirqd/5      [kernel.vmlinux]              [k] netif_receive_skb_internal
     2.24%  ksoftirqd/5      [kernel.vmlinux]              [k] __napi_alloc_skb
     1.92%  ksoftirqd/5      [kernel.vmlinux]              [k] eth_type_trans
     1.78%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_rx_xdp
     1.62%  ksoftirqd/5      [kernel.vmlinux]              [k] net_rx_action
     1.61%  ksoftirqd/5      [kernel.vmlinux]              [k] kfree_skb
     1.58%  ksoftirqd/5      [kernel.vmlinux]              [k] dev_gro_receive
     1.51%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_reuse_rx_data
     1.44%  ksoftirqd/5      [kernel.vmlinux]              [k] skb_release_data
     1.42%  ksoftirqd/5      [kernel.vmlinux]              [k] page_frag_alloc
     1.29%  ksoftirqd/5      [kernel.vmlinux]              [k] napi_gro_receive
     1.21%  ksoftirqd/5      [kernel.vmlinux]              [k] skb_release_head_state
     0.82%  ksoftirqd/5      [kernel.vmlinux]              [k] skb_free_head
     0.81%  ksoftirqd/5      [kernel.vmlinux]              [k] swiotlb_sync_single
     0.77%  ksoftirqd/5      [kernel.vmlinux]              [k] skb_gro_reset_offset
     0.76%  ksoftirqd/5      [kernel.vmlinux]              [k] skb_release_all
     0.73%  ksoftirqd/5      [kernel.vmlinux]              [k] kfree_skbmem




> >      10.11%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_rx_pkt
> >       7.69%  ksoftirqd/3      [kernel.vmlinux]              [k] __build_skb
> >       4.25%  ksoftirqd/3      [kernel.vmlinux]              [k] inet_gro_receive
> >       3.74%  ksoftirqd/3      [kernel.vmlinux]              [k] kmem_cache_alloc
> >       3.53%  ksoftirqd/3      [kernel.vmlinux]              [k] dev_gro_receive
> >       3.43%  ksoftirqd/3      [kernel.vmlinux]              [k] page_frag_free
> >       3.12%  ksoftirqd/3      [kernel.vmlinux]              [k] kmem_cache_free
> >       2.56%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_poll
> >       2.46%  ksoftirqd/3      [kernel.vmlinux]              [k] netif_receive_skb_internal
> >       2.13%  ksoftirqd/3      [kernel.vmlinux]              [k] __udp4_lib_lookup
> >       1.63%  ksoftirqd/3      [kernel.vmlinux]              [k] __napi_alloc_skb
> >       1.51%  ksoftirqd/3      [kernel.vmlinux]              [k] eth_type_trans
> >       1.42%  ksoftirqd/3      [kernel.vmlinux]              [k] udp_gro_receive
> >       1.29%  ksoftirqd/3      [kernel.vmlinux]              [k] napi_gro_receive
> >       1.25%  ksoftirqd/3      [kernel.vmlinux]              [k] udp4_gro_receive
> >       1.18%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_rx_xdp
> >       1.17%  ksoftirqd/3      [kernel.vmlinux]              [k] skb_release_data
> >       1.11%  ksoftirqd/3      [bnxt_en]                     [k] bnxt_reuse_rx_data
> >       1.07%  ksoftirqd/3      [kernel.vmlinux]              [k] net_rx_action
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 21:30     ` Andy Gospodarek
@ 2017-04-10 21:47       ` Michael Chan
  2017-04-11  0:56         ` David Miller
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Chan @ 2017-04-10 21:47 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: David Miller, Netdev, xdp-newbies

On Mon, Apr 10, 2017 at 2:30 PM, Andy Gospodarek <andy@greyhouse.net> wrote:
> On Mon, Apr 10, 2017 at 03:28:54PM -0400, David Miller wrote:
>> From: Andy Gospodarek <andy@greyhouse.net>
>> Date: Mon, 10 Apr 2017 14:39:35 -0400
>>
>> > As promised, I did some testing today with bnxt_en's implementation
>> > of XDP and this one.
>>
>> Thanks a lot Andy, obviously the patch needs some more work.
>>
>> I noticed GRO stuff in your profile, and Alexei mentioned this earlier
>> today.  We probably should elide GRO if generic XDP is attached, since
>> in particular this makes the skb_linearize() really expensive.
>
> Good catch -- I actually thought we were disabling GRO automatically and it
> looks like we are not.  :-/  I'll send Michael a patch.

Andy,  I think we only need to disable GRO if we are doing generic
XDP.  Optimized XDP can still use GRO for the XDP_PASS case.

>
> Disabling GRO allows me to process an additional 1Mpps, so I'm up to 7.5Mpps
> with this patch.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 21:47       ` Michael Chan
@ 2017-04-11  0:56         ` David Miller
  0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2017-04-11  0:56 UTC (permalink / raw)
  To: michael.chan; +Cc: andy, netdev, xdp-newbies

From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 10 Apr 2017 14:47:04 -0700

> On Mon, Apr 10, 2017 at 2:30 PM, Andy Gospodarek <andy@greyhouse.net> wrote:
>> On Mon, Apr 10, 2017 at 03:28:54PM -0400, David Miller wrote:
>>> From: Andy Gospodarek <andy@greyhouse.net>
>>> Date: Mon, 10 Apr 2017 14:39:35 -0400
>>>
>>> > As promised, I did some testing today with bnxt_en's implementation
>>> > of XDP and this one.
>>>
>>> Thanks a lot Andy, obviously the patch needs some more work.
>>>
>>> I noticed GRO stuff in your profile, and Alexei mentioned this earlier
>>> today.  We probably should elide GRO if generic XDP is attached, since
>>> in particular this makes the skb_linearize() really expensive.
>>
>> Good catch -- I actually thought we were disabling GRO automatically and it
>> looks like we are not.  :-/  I'll send Michael a patch.
> 
> Andy,  I think we only need to disable GRO if we are doing generic
> XDP.  Optimized XDP can still use GRO for the XDP_PASS case.

Right.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-10 21:41     ` Andy Gospodarek
@ 2017-04-11 16:05       ` Eric Dumazet
  2017-04-11 16:12         ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2017-04-11 16:05 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: Daniel Borkmann, David Miller, netdev, xdp-newbies

On Mon, 2017-04-10 at 17:41 -0400, Andy Gospodarek wrote:

> Results with bpf_git_enable=1 and gro off, still only 7.5Mpps.

7.5 Mpps is already a nice number.

We all understand that using native XDP in the driver might give some
extra pps, but for the ones that do not want to patch old drivers, it is
probably good enough.

> 
>     26.34%  ksoftirqd/5      [kernel.vmlinux]              [k] memcpy_erms

Can you investigate to see what are the call graphs ?

Is this from copying headers to skb->head from bnxt driver ?

Some kind of copybreak maybe ?

perf record -a -g sleep 5
perf report --stdio

Copybreak is generally not really useful, and can have downsides.

Much better to let upper stacks deciding this.

For example, there is no point doing copy break for TCP ACK packets that
are going to be consumed immediately.

There is also no point doing copy break in case the packet will be
dropped (say by ... XDP ;) )


>     14.79%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_rx_pkt
>     10.11%  ksoftirqd/5      [kernel.vmlinux]              [k] __build_skb
>      5.01%  ksoftirqd/5      [kernel.vmlinux]              [k] page_frag_free
>      4.66%  ksoftirqd/5      [kernel.vmlinux]              [k] kmem_cache_alloc
>      4.19%  ksoftirqd/5      [kernel.vmlinux]              [k] kmem_cache_free
>      3.67%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_poll
>      2.97%  ksoftirqd/5      [kernel.vmlinux]              [k] netif_receive_skb_internal
>      2.24%  ksoftirqd/5      [kernel.vmlinux]              [k] __napi_alloc_skb
>      1.92%  ksoftirqd/5      [kernel.vmlinux]              [k] eth_type_trans
>      1.78%  ksoftirqd/5      [bnxt_en]                     [k] bnxt_rx_xdp
>      1.62%  ksoftirqd/5      [kernel.vmlinux]              [k] net_rx_action

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-11 16:05       ` Eric Dumazet
@ 2017-04-11 16:12         ` Eric Dumazet
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Dumazet @ 2017-04-11 16:12 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: Daniel Borkmann, David Miller, netdev, xdp-newbies

On Tue, 2017-04-11 at 09:05 -0700, Eric Dumazet wrote:

> Some kind of copybreak maybe ?
> 
> perf record -a -g sleep 5
> perf report --stdio
> 
> Copybreak is generally not really useful, and can have downsides.
> 
> Much better to let upper stacks deciding this.
> 
> For example, there is no point doing copy break for TCP ACK packets that
> are going to be consumed immediately.
> 
> There is also no point doing copy break in case the packet will be
> dropped (say by ... XDP ;) )


Yes :

#define BNXT_RX_COPY_THRESH 256

For optimal results, you probably want to remove copybreak.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 net-next RFC] Generic XDP
  2017-04-09 20:35 [PATCH v2 net-next RFC] Generic XDP David Miller
                   ` (3 preceding siblings ...)
  2017-04-10 21:08 ` Daniel Borkmann
@ 2017-04-11 16:28 ` Eric Dumazet
  4 siblings, 0 replies; 19+ messages in thread
From: Eric Dumazet @ 2017-04-11 16:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, xdp-newbies

On Sun, 2017-04-09 at 13:35 -0700, David Miller wrote:
> This provides a generic non-optimized XDP implementation when the
> device driver does not provide an optimized one.
> 
> It is arguable that perhaps I should have required something like
> this as part of the initial XDP feature merge.
> 
> I believe this is critical for two reasons:
> 
> 1) Accessibility.  More people can play with XDP with less
>    dependencies.  Yes I know we have XDP support in virtio_net, but
>    that just creates another depedency for learning how to use this
>    facility.
> 
>    I wrote this to make life easier for the XDP newbies.
> 
> 2) As a model for what the expected semantics are.  If there is a pure
>    generic core implementation, it serves as a semantic example for
>    driver folks adding XDP support.
> 

Nice !

It seems that this implementation is not vlan aware ?

It seems that some native XDP implementations are not vlan aware as
well.

Probably need to disable VLAN acceleration when/if XDP is used.

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2017-04-11 16:28 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-09 20:35 [PATCH v2 net-next RFC] Generic XDP David Miller
2017-04-10  2:18 ` Alexei Starovoitov
2017-04-10 16:57   ` Willem de Bruijn
2017-04-10 19:33   ` David Miller
2017-04-10 19:50   ` Daniel Borkmann
2017-04-10 18:39 ` Andy Gospodarek
2017-04-10 19:28   ` David Miller
2017-04-10 21:30     ` Andy Gospodarek
2017-04-10 21:47       ` Michael Chan
2017-04-11  0:56         ` David Miller
2017-04-10 19:34   ` David Miller
2017-04-10 21:33     ` Andy Gospodarek
2017-04-10 20:12   ` Daniel Borkmann
2017-04-10 21:41     ` Andy Gospodarek
2017-04-11 16:05       ` Eric Dumazet
2017-04-11 16:12         ` Eric Dumazet
2017-04-10 19:28 ` Stephen Hemminger
2017-04-10 21:08 ` Daniel Borkmann
2017-04-11 16:28 ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.