bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: David Ahern <dsahern@gmail.com>,
	netdev@vger.kernel.org, Jakub Kicinski <kuba@kernel.org>,
	"David S. Miller" <davem@davemloft.net>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>,
	bpf@vger.kernel.org, Eric Dumazet <eric.dumazet@gmail.com>,
	Daniel Borkmann <borkmann@iogearbox.net>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>
Subject: [PATCH net-next V1] net: adjust net_device layout for cacheline usage
Date: Tue, 26 Jan 2021 18:39:39 +0100	[thread overview]
Message-ID: <161168277983.410784.12401225493601624417.stgit@firesoul> (raw)

The current layout of net_device is not optimal for cacheline usage.

The member adj_list.lower linked list is split between cacheline 2 and 3.
The ifindex is placed together with stats (struct net_device_stats),
although most modern drivers don't update this stats member.

The members netdev_ops, mtu and hard_header_len are placed on three
different cachelines. These members are accessed for XDP redirect into
devmap, which were noticeably with perf tool. When not using the map
redirect variant (like TC-BPF does), then ifindex is also used, which is
placed on a separate fourth cacheline. These members are also accessed
during forwarding with regular network stack. The members priv_flags and
flags are on fast-path for network stack transmit path in __dev_queue_xmit
(currently located together with mtu cacheline).

This patch creates a read mostly cacheline, with the purpose of keeping the
above mentioned members on the same cacheline.

Some netdev_features_t members also becomes part of this cacheline, which is
on purpose, as function netif_skb_features() is on fast-path via
validate_xmit_skb().

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/linux/netdevice.h |   53 +++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b7915484369c..2645f114de54 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1855,7 +1855,6 @@ struct net_device {
 	unsigned long		mem_end;
 	unsigned long		mem_start;
 	unsigned long		base_addr;
-	int			irq;
 
 	/*
 	 *	Some hardware also needs these fields (state,dev_list,
@@ -1877,6 +1876,23 @@ struct net_device {
 		struct list_head lower;
 	} adj_list;
 
+	/* Read-mostly cache-line for fast-path access */
+	unsigned int		flags;
+	unsigned int		priv_flags;
+	const struct net_device_ops *netdev_ops;
+	int			ifindex;
+	unsigned short		gflags;
+	unsigned short		hard_header_len;
+
+	/* Note : dev->mtu is often read without holding a lock.
+	 * Writers usually hold RTNL.
+	 * It is recommended to use READ_ONCE() to annotate the reads,
+	 * and to use WRITE_ONCE() to annotate the writes.
+	 */
+	unsigned int		mtu;
+	unsigned short		needed_headroom;
+	unsigned short		needed_tailroom;
+
 	netdev_features_t	features;
 	netdev_features_t	hw_features;
 	netdev_features_t	wanted_features;
@@ -1885,10 +1901,15 @@ struct net_device {
 	netdev_features_t	mpls_features;
 	netdev_features_t	gso_partial_features;
 
-	int			ifindex;
+	unsigned int		min_mtu;
+	unsigned int		max_mtu;
+	unsigned short		type;
+	unsigned char		min_header_len;
+	unsigned char		name_assign_type;
+
 	int			group;
 
-	struct net_device_stats	stats;
+	struct net_device_stats	stats; /* not used by modern drivers */
 
 	atomic_long_t		rx_dropped;
 	atomic_long_t		tx_dropped;
@@ -1902,7 +1923,6 @@ struct net_device {
 	const struct iw_handler_def *wireless_handlers;
 	struct iw_public_data	*wireless_data;
 #endif
-	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	const struct l3mdev_ops	*l3mdev_ops;
@@ -1921,34 +1941,12 @@ struct net_device {
 
 	const struct header_ops *header_ops;
 
-	unsigned int		flags;
-	unsigned int		priv_flags;
-
-	unsigned short		gflags;
-	unsigned short		padded;
-
 	unsigned char		operstate;
 	unsigned char		link_mode;
 
 	unsigned char		if_port;
 	unsigned char		dma;
 
-	/* Note : dev->mtu is often read without holding a lock.
-	 * Writers usually hold RTNL.
-	 * It is recommended to use READ_ONCE() to annotate the reads,
-	 * and to use WRITE_ONCE() to annotate the writes.
-	 */
-	unsigned int		mtu;
-	unsigned int		min_mtu;
-	unsigned int		max_mtu;
-	unsigned short		type;
-	unsigned short		hard_header_len;
-	unsigned char		min_header_len;
-	unsigned char		name_assign_type;
-
-	unsigned short		needed_headroom;
-	unsigned short		needed_tailroom;
-
 	/* Interface address info. */
 	unsigned char		perm_addr[MAX_ADDR_LEN];
 	unsigned char		addr_assign_type;
@@ -1959,7 +1957,10 @@ struct net_device {
 	unsigned short		neigh_priv_len;
 	unsigned short          dev_id;
 	unsigned short          dev_port;
+	unsigned short		padded;
+
 	spinlock_t		addr_list_lock;
+	int			irq;
 
 	struct netdev_hw_addr_list	uc;
 	struct netdev_hw_addr_list	mc;



             reply	other threads:[~2021-01-27 12:49 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-26 17:39 Jesper Dangaard Brouer [this message]
2021-01-29  3:51 ` [PATCH net-next V1] net: adjust net_device layout for cacheline usage David Ahern
2021-01-29  7:58   ` Jesper Dangaard Brouer
     [not found]     ` <20210129114642.139cb7dc@carbon>
2021-01-29 19:35       ` Jakub Kicinski
2021-01-29 19:47         ` Eric Dumazet
2021-01-29 20:07           ` Jakub Kicinski
2021-02-12 15:57             ` Jesper Dangaard Brouer
2021-01-29  4:50 ` patchwork-bot+netdevbpf
     [not found] ` <52835f1f-96e1-b36e-2631-1182649ac3a8@gmail.com>
     [not found]   ` <20210129150058.34e3a855@carbon>
2021-02-12 15:49     ` Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=161168277983.410784.12401225493601624417.stgit@firesoul \
    --to=brouer@redhat.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=borkmann@iogearbox.net \
    --cc=bpf@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@gmail.com \
    --cc=eric.dumazet@gmail.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).