netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] netdev attribute to control xdpgeneric skb linearization
@ 2020-02-28 10:54 Luigi Rizzo
  2020-02-28 11:20 ` Toke Høiland-Jørgensen
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Luigi Rizzo @ 2020-02-28 10:54 UTC (permalink / raw)
  To: netdev, toke, davem, hawk, sameehj; +Cc: linux-kernel, Luigi Rizzo

Add a netdevice flag to control skb linearization in generic xdp mode.

The attribute can be modified through
	/sys/class/net/<DEVICE>/xdpgeneric_linearize
The default is 1 (on)

Motivation: xdp expects linear skbs with some minimum headroom, and
generic xdp calls skb_linearize() if needed. The linearization is
expensive, and may be unnecessary e.g. when the xdp program does
not need access to the whole payload.
This sysfs entry allows users to opt out of linearization on a
per-device basis (linearization is still performed on cloned skbs).

On a kernel instrumented to grab timestamps around the linearization
code in netif_receive_generic_xdp, and heavy netperf traffic with 1500b
mtu, I see the following times (nanoseconds/pkt)

The receiver generally sees larger packets so the difference is more
significant.

ns/pkt                   RECEIVER                 SENDER

                    p50     p90     p99       p50   p90    p99

LINEARIZATION:    600ns  1090ns  4900ns     149ns 249ns  460ns
NO LINEARIZATION:  40ns    59ns    90ns      40ns  50ns  100ns

v1 --> v2 : added Documentation
v2 --> v3 : adjusted for skb_cloned
v3 --> v4 : renamed to xdpgeneric_linearize, documentation

Signed-off-by: Luigi Rizzo <lrizzo@google.com>
---
 Documentation/ABI/testing/sysfs-class-net | 10 ++++++++++
 include/linux/netdevice.h                 |  3 ++-
 net/core/dev.c                            |  8 ++++++--
 net/core/net-sysfs.c                      | 16 ++++++++++++++++
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 664a8f6a634f..d5531bf223d7 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -301,3 +301,13 @@ Contact:	netdev@vger.kernel.org
 Description:
 		32-bit unsigned integer counting the number of times the link has
 		been down
+
+What:		/sys/class/net/<iface>/xdpgeneric_linearize
+Date:		Feb 2020
+KernelVersion:	5.6
+Contact:	netdev@vger.kernel.org
+Description:
+		boolean controlling whether skbs should be linearized in
+		generic XDP. Defaults to true. Turning this off can increase
+		the performance of generic XDP at the cost of making the XDP
+		program unable to access packet fragments after the first one.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6c3f7032e8d9..f06294b2e8bb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1985,7 +1985,8 @@ struct net_device {
 
 	struct netdev_rx_queue	*_rx;
 	unsigned int		num_rx_queues;
-	unsigned int		real_num_rx_queues;
+	unsigned int		real_num_rx_queues:31;
+	unsigned int		xdpgeneric_linearize : 1;
 
 	struct bpf_prog __rcu	*xdp_prog;
 	unsigned long		gro_flush_timeout;
diff --git a/net/core/dev.c b/net/core/dev.c
index dbbfff123196..c539489d3166 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4520,9 +4520,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	/* XDP packets must be linear and must have sufficient headroom
 	 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
 	 * native XDP provides, thus we need to do it here as well.
+	 * For non shared skbs, xdpgeneric_linearize controls linearization.
 	 */
-	if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
-	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+	if (skb_cloned(skb) ||
+	    (skb->dev->xdpgeneric_linearize &&
+	     (skb_is_nonlinear(skb) ||
+	      skb_headroom(skb) < XDP_PACKET_HEADROOM))) {
 		int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
 		int troom = skb->tail + skb->data_len - skb->end;
 
@@ -9806,6 +9809,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->gso_max_segs = GSO_MAX_SEGS;
 	dev->upper_level = 1;
 	dev->lower_level = 1;
+	dev->xdpgeneric_linearize = 1;
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cf0215734ceb..eab06a427d90 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -442,6 +442,21 @@ static ssize_t proto_down_store(struct device *dev,
 }
 NETDEVICE_SHOW_RW(proto_down, fmt_dec);
 
+static int change_xdpgeneric_linearize(struct net_device *dev,
+				       unsigned long val)
+{
+	dev->xdpgeneric_linearize = !!val;
+	return 0;
+}
+
+static ssize_t xdpgeneric_linearize_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_xdpgeneric_linearize);
+}
+NETDEVICE_SHOW_RW(xdpgeneric_linearize, fmt_dec);
+
 static ssize_t phys_port_id_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
@@ -536,6 +551,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_phys_port_name.attr,
 	&dev_attr_phys_switch_id.attr,
 	&dev_attr_proto_down.attr,
+	&dev_attr_xdpgeneric_linearize.attr,
 	&dev_attr_carrier_up_count.attr,
 	&dev_attr_carrier_down_count.attr,
 	NULL,
-- 
2.25.1.481.gfbce0eb801-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-03-04 10:06 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-28 10:54 [PATCH v4] netdev attribute to control xdpgeneric skb linearization Luigi Rizzo
2020-02-28 11:20 ` Toke Høiland-Jørgensen
2020-02-28 12:12 ` Michal Kubecek
2020-02-28 12:29 ` Jesper Dangaard Brouer
2020-02-28 13:19   ` Luigi Rizzo
2020-02-28 12:30 ` Jesper Dangaard Brouer
2020-02-28 19:00 ` Jakub Kicinski
2020-02-28 23:53   ` Willem de Bruijn
2020-03-03 19:46     ` Daniel Borkmann
2020-03-03 20:50       ` Jakub Kicinski
2020-03-03 21:04         ` Daniel Borkmann
2020-03-03 21:10         ` Willem de Bruijn
2020-03-04  9:18           ` Jesper Dangaard Brouer
2020-03-04 10:06       ` Luigi Rizzo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).