All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Miller <davem@davemloft.net>
To: netdev@vger.kernel.org
Cc: xdp-newbies@vger.kernel.org
Subject: [PATCH v3 net-next RFC] Generic XDP
Date: Wed, 12 Apr 2017 14:54:15 -0400 (EDT)	[thread overview]
Message-ID: <20170412.145415.1441440342830198148.davem@davemloft.net> (raw)


This provides a generic SKB based non-optimized XDP path which is used
if either the driver lacks a specific XDP implementation, or the user
requests it via a new IFLA_XDP_FLAGS value named XDP_FLAGS_SKB_MODE.

It is arguable that perhaps I should have required something like
this as part of the initial XDP feature merge.

I believe this is critical for two reasons:

1) Accessibility.  More people can play with XDP with less
   dependencies.  Yes I know we have XDP support in virtio_net, but
   that just creates another depedency for learning how to use this
   facility.

   I wrote this to make life easier for the XDP newbies.

2) As a model for what the expected semantics are.  If there is a pure
   generic core implementation, it serves as a semantic example for
   driver folks adding XDP support.

This is just a rough draft and is untested.

One thing I have not tried to address here is the issue of
XDP_PACKET_HEADROOM, thanks to Daniel for spotting that.  It seems
incredibly expensive to do a skb_cow(skb, XDP_PACKET_HEADROOM) or
whatever even if the XDP program doesn't try to push headers at all.
I think we really need the verifier to somehow propagate whether
certain XDP helpers are used or not.

Signed-off-by: David S. Miller <davem@davemloft.net>
--

v3:
 - Make sure XDP program sees packet at MAC header, push back MAC
   header if we do XDP_TX.  (Alexei)
 - Elide GRO when generic XDP is in use.  (Alexei)
 - Add XDP_FLAG_SKB_MODE flag which the user can use to request generic
   XDP even if the driver has an XDP implementation.  (Alexei)
 - Report whether SKB mode is in use in rtnl_xdp_fill() via XDP_FLAGS
   attribute.  (Daniel)

v2:
 - Add some "fall through" comments in switch statements based
   upon feedback from Andrew Lunn
 - Use RCU for generic xdp_prog, thanks to Johannes Berg.

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b0aa089..071a58b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1891,9 +1891,17 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
+	struct bpf_prog __rcu	*xdp_prog;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
+static inline bool netif_elide_gro(const struct net_device *dev)
+{
+	if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
+		return true;
+	return false;
+}
+
 #define	NETDEV_ALIGN		32
 
 static inline
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8b405af..633aa02 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -887,7 +887,9 @@ enum {
 /* XDP section */
 
 #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
-#define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST)
+#define XDP_FLAGS_SKB_MODE		(2U << 0)
+#define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
+					 XDP_FLAGS_SKB_MODE)
 
 enum {
 	IFLA_XDP_UNSPEC,
diff --git a/net/core/dev.c b/net/core/dev.c
index ef9fe60e..9ed4569 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -95,6 +95,7 @@
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
 #include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/busy_poll.h>
@@ -4247,6 +4248,88 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	struct bpf_prog *new = xdp->prog;
+	int ret = 0;
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG: {
+		struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
+
+		rcu_assign_pointer(dev->xdp_prog, new);
+		if (old)
+			bpf_prog_put(old);
+
+		if (old && !new)
+			static_key_slow_dec(&generic_xdp_needed);
+		else if (new && !old)
+			static_key_slow_inc(&generic_xdp_needed);
+		break;
+	}
+
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct bpf_prog *xdp_prog)
+{
+	struct xdp_buff xdp;
+	u32 act = XDP_DROP;
+	void *orig_data;
+	int hlen, off;
+
+	if (skb_linearize(skb))
+		goto do_drop;
+
+	/* The XDP program wants to see the packet starting at the MAC
+	 * header.
+	 */
+	hlen = skb_headlen(skb) + skb->mac_len;
+	xdp.data = skb->data - skb->mac_len;
+	xdp.data_end = xdp.data + hlen;
+	xdp.data_hard_start = xdp.data - skb_headroom(skb);
+	orig_data = xdp.data;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	off = xdp.data - orig_data;
+	if (off)
+		__skb_push(skb, off);
+
+	switch (act) {
+	case XDP_TX:
+		__skb_push(skb, skb->mac_len);
+		/* fall through */
+	case XDP_PASS:
+		break;
+
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(skb->dev, xdp_prog, act);
+		/* fall through */
+	case XDP_DROP:
+	do_drop:
+		kfree_skb(skb);
+		break;
+	}
+
+	return act;
+}
+
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4258,6 +4341,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 
 	rcu_read_lock();
 
+	if (static_key_false(&generic_xdp_needed)) {
+		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+		if (xdp_prog) {
+			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+
+			if (act != XDP_PASS) {
+				rcu_read_unlock();
+				if (act == XDP_TX)
+					dev_queue_xmit(skb);
+				return NET_RX_DROP;
+			}
+		}
+	}
+
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4490,7 +4588,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	enum gro_result ret;
 	int grow;
 
-	if (!(skb->dev->features & NETIF_F_GRO))
+	if (netif_elide_gro(skb->dev))
 		goto normal;
 
 	if (skb->csum_bad)
@@ -6718,6 +6816,7 @@ EXPORT_SYMBOL(dev_change_proto_down);
  */
 int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 {
+	int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct bpf_prog *prog = NULL;
 	struct netdev_xdp xdp;
@@ -6725,14 +6824,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 
 	ASSERT_RTNL();
 
-	if (!ops->ndo_xdp)
-		return -EOPNOTSUPP;
+	xdp_op = ops->ndo_xdp;
+	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
+		xdp_op = generic_xdp_install;
+
 	if (fd >= 0) {
 		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
 			memset(&xdp, 0, sizeof(xdp));
 			xdp.command = XDP_QUERY_PROG;
 
-			err = ops->ndo_xdp(dev, &xdp);
+			err = xdp_op(dev, &xdp);
 			if (err < 0)
 				return err;
 			if (xdp.prog_attached)
@@ -6748,7 +6849,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 	xdp.command = XDP_SETUP_PROG;
 	xdp.prog = prog;
 
-	err = ops->ndo_xdp(dev, &xdp);
+	err = xdp_op(dev, &xdp);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
 
@@ -7789,6 +7890,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
 void free_netdev(struct net_device *dev)
 {
 	struct napi_struct *p, *n;
+	struct bpf_prog *prog;
 
 	might_sleep();
 	netif_free_tx_queues(dev);
@@ -7807,6 +7909,12 @@ void free_netdev(struct net_device *dev)
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
 
+	prog = rcu_dereference(dev->xdp_prog);
+	if (prog) {
+		bpf_prog_put(prog);
+		static_key_slow_dec(&generic_xdp_needed);
+	}
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		netdev_freemem(dev);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index c98bbfb..814e58a 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct gro_cell *cell;
 
-	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
 		return netif_rx(skb);
 
 	cell = this_cpu_ptr(gcells->cells);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 58419da..958a2bf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,15 +896,13 @@ static size_t rtnl_port_size(const struct net_device *dev,
 		return port_self_size;
 }
 
-static size_t rtnl_xdp_size(const struct net_device *dev)
+static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
-			  nla_total_size(1);	/* XDP_ATTACHED */
+			  nla_total_size(1) +	/* XDP_ATTACHED */
+			  nla_total_size(4);	/* XDP_FLAGS */
 
-	if (!dev->netdev_ops->ndo_xdp)
-		return 0;
-	else
-		return xdp_size;
+	return xdp_size;
 }
 
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
@@ -943,7 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
-	       + rtnl_xdp_size(dev) /* IFLA_XDP */
+	       + rtnl_xdp_size() /* IFLA_XDP */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
 }
@@ -1251,23 +1249,35 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
-	struct netdev_xdp xdp_op = {};
 	struct nlattr *xdp;
+	u32 xdp_flags = 0;
+	u8 val = 0;
 	int err;
 
-	if (!dev->netdev_ops->ndo_xdp)
-		return 0;
 	xdp = nla_nest_start(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
-	xdp_op.command = XDP_QUERY_PROG;
-	err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
-	if (err)
-		goto err_cancel;
-	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+	if (rcu_access_pointer(dev->xdp_prog)) {
+		xdp_flags = XDP_FLAGS_SKB_MODE;
+		val = 1;
+	} else if (dev->netdev_ops->ndo_xdp) {
+		struct netdev_xdp xdp_op = {};
+
+		xdp_op.command = XDP_QUERY_PROG;
+		err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+		if (err)
+			goto err_cancel;
+		val = xdp_op.prog_attached;
+	}
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
 	if (err)
 		goto err_cancel;
 
+	if (xdp_flags) {
+		err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
+		if (err)
+			goto err_cancel;
+	}
 	nla_nest_end(skb, xdp);
 	return 0;
 

             reply	other threads:[~2017-04-12 18:54 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-12 18:54 David Miller [this message]
2017-04-12 19:54 ` [PATCH v3 net-next RFC] Generic XDP David Ahern
2017-04-13  2:08   ` David Miller
2017-04-13  2:16     ` David Ahern
2017-04-12 21:30 ` Stephen Hemminger
2017-04-12 21:49   ` Eric Dumazet
2017-04-13  1:55     ` David Miller
2017-04-13  1:54   ` David Miller
2017-04-13  4:20 ` Alexei Starovoitov
2017-04-13  6:10   ` Johannes Berg
2017-04-13 15:38     ` David Miller
2017-04-14 19:41       ` Alexei Starovoitov
2017-04-18  9:47         ` Johannes Berg
2017-04-18 23:09           ` Alexei Starovoitov
2017-04-13 15:37   ` David Miller
2017-04-13 19:22     ` Johannes Berg
2017-04-13 20:01       ` David Miller
2017-04-14  8:07         ` Johannes Berg
2017-04-14 19:09           ` Alexei Starovoitov
2017-04-14  9:05     ` Jesper Dangaard Brouer
2017-04-14 19:28       ` Alexei Starovoitov
2017-04-14 22:18         ` Daniel Borkmann
2017-04-14 22:30         ` Jakub Kicinski
2017-04-15  0:46           ` Alexei Starovoitov
2017-04-15  1:47             ` Jakub Kicinski
2017-04-16 20:26             ` Jesper Dangaard Brouer
2017-04-17 19:49               ` David Miller
2017-04-17 23:04                 ` Alexei Starovoitov
2017-04-17 23:33                   ` Daniel Borkmann
2017-04-18 18:46                   ` David Miller
2017-04-18 23:05                     ` Alexei Starovoitov
2017-04-13  6:48 ` Michael Chan
2017-04-13 15:38   ` David Miller
2017-04-13 15:57 ` Daniel Borkmann
2017-04-13 16:04   ` David Miller
2017-04-13 17:13 ` aa5c2fd79f: net/core/dev.c:#suspicious_rcu_dereference_check()usage kernel test robot
2017-04-13 17:13   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170412.145415.1441440342830198148.davem@davemloft.net \
    --to=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=xdp-newbies@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.