bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: bpf@vger.kernel.org, netdev@vger.kernel.org
Cc: "Björn Töpel" <bjorn@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Andrii Nakryiko" <andrii@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Hao Luo" <haoluo@google.com>, "Jakub Kicinski" <kuba@kernel.org>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"Jiri Olsa" <jolsa@kernel.org>,
	"John Fastabend" <john.fastabend@gmail.com>,
	"Jonathan Lemon" <jonathan.lemon@gmail.com>,
	"KP Singh" <kpsingh@kernel.org>,
	"Maciej Fijalkowski" <maciej.fijalkowski@intel.com>,
	"Magnus Karlsson" <magnus.karlsson@intel.com>,
	"Martin KaFai Lau" <martin.lau@linux.dev>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Song Liu" <song@kernel.org>,
	"Stanislav Fomichev" <sdf@google.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Yonghong Song" <yonghong.song@linux.dev>,
	"Sebastian Andrzej Siewior" <bigeasy@linutronix.de>
Subject: [PATCH RFC net-next 2/2] net: Move per-CPU flush-lists to bpf_xdp_storage on PREEMPT_RT.
Date: Tue, 13 Feb 2024 15:58:53 +0100	[thread overview]
Message-ID: <20240213145923.2552753-3-bigeasy@linutronix.de> (raw)
In-Reply-To: <20240213145923.2552753-1-bigeasy@linutronix.de>

The per-CPU flush lists are accessed from within the NAPI callback
(xdp_do_flush() for instance). They are subject to the same problem as struct
bpf_redirect_info.

Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and
xskmap_map_flush_list to struct bpf_xdp_storage. Add wrappers for the
access. Use it only on PREEMPT_RT, keep the per-CPU lists on
non-PREEMPT_RT builds.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/filter.h | 34 ++++++++++++++++++++++++++++++++++
 kernel/bpf/cpumap.c    | 33 ++++++++++++++++++++++++++-------
 kernel/bpf/devmap.c    | 33 ++++++++++++++++++++++++++-------
 net/xdp/xsk.c          | 33 +++++++++++++++++++++++++++------
 4 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 97c9be9cabfd6..231ecdc431a00 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -725,6 +725,9 @@ static inline struct bpf_redirect_info *xdp_storage_get_ri(void)
 
 struct bpf_xdp_storage {
 	struct bpf_redirect_info ri;
+	struct list_head cpu_map_flush_list;
+	struct list_head dev_map_flush_list;
+	struct list_head xskmap_map_flush_list;
 };
 
 static inline struct bpf_xdp_storage *xdp_storage_set(struct bpf_xdp_storage *xdp_store)
@@ -734,6 +737,9 @@ static inline struct bpf_xdp_storage *xdp_storage_set(struct bpf_xdp_storage *xd
 	tsk = current;
 	if (tsk->bpf_xdp_storage != NULL)
 		return NULL;
+	INIT_LIST_HEAD(&xdp_store->cpu_map_flush_list);
+	INIT_LIST_HEAD(&xdp_store->dev_map_flush_list);
+	INIT_LIST_HEAD(&xdp_store->xskmap_map_flush_list);
 	tsk->bpf_xdp_storage = xdp_store;
 	return xdp_store;
 }
@@ -764,6 +770,34 @@ static inline struct bpf_redirect_info *xdp_storage_get_ri(void)
 		return NULL;
 	return &xdp_store->ri;
 }
+
+static inline struct list_head *xdp_storage_get_cpu_map_flush_list(void)
+{
+	struct bpf_xdp_storage *xdp_store = xdp_storage_get();
+
+	if (!xdp_store)
+		return NULL;
+	return &xdp_store->cpu_map_flush_list;
+}
+
+static inline struct list_head *xdp_storage_get_dev_flush_list(void)
+{
+	struct bpf_xdp_storage *xdp_store = xdp_storage_get();
+
+	if (!xdp_store)
+		return NULL;
+	return &xdp_store->dev_map_flush_list;
+}
+
+static inline struct list_head *xdp_storage_get_xskmap_flush_list(void)
+{
+	struct bpf_xdp_storage *xdp_store = xdp_storage_get();
+
+	if (!xdp_store)
+		return NULL;
+	return &xdp_store->xskmap_map_flush_list;
+}
+
 #endif
 DEFINE_FREE(xdp_storage_clear, struct bpf_xdp_storage *, if (_T) xdp_storage_clear(_T));
 
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c40ae831ab1a6..ec5be37399c82 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -78,8 +78,30 @@ struct bpf_cpu_map {
 	struct bpf_cpu_map_entry __rcu **cpu_map;
 };
 
+#ifndef CONFIG_PREEMPT_RT
 static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
 
+static struct list_head *xdp_storage_get_cpu_map_flush_list(void)
+{
+	return this_cpu_ptr(&cpu_map_flush_list);
+}
+
+static void init_cpu_map_flush_list(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
+}
+
+#else
+
+static void init_cpu_map_flush_list(void)
+{
+}
+
+#endif
+
 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 {
 	u32 value_size = attr->value_size;
@@ -703,7 +725,7 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
  */
 static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
-	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+	struct list_head *flush_list = xdp_storage_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -755,7 +777,7 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
 
 void __cpu_map_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+	struct list_head *flush_list = xdp_storage_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq, *tmp;
 
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -769,7 +791,7 @@ void __cpu_map_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool cpu_map_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&cpu_map_flush_list)))
+	if (list_empty(xdp_storage_get_cpu_map_flush_list()))
 		return false;
 	__cpu_map_flush();
 	return true;
@@ -778,10 +800,7 @@ bool cpu_map_check_flush(void)
 
 static int __init cpu_map_init(void)
 {
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
+	init_cpu_map_flush_list();
 	return 0;
 }
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a936c704d4e77..c1af5d9d60381 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -83,7 +83,29 @@ struct bpf_dtab {
 	u32 n_buckets;
 };
 
+#ifndef CONFIG_PREEMPT_RT
 static DEFINE_PER_CPU(struct list_head, dev_flush_list);
+
+static struct list_head *xdp_storage_get_dev_flush_list(void)
+{
+	return this_cpu_ptr(&dev_flush_list);
+}
+
+static void init_dev_flush_list(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
+}
+
+#else
+
+static void init_dev_flush_list(void)
+{
+}
+#endif
+
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
@@ -407,7 +429,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
  */
 void __dev_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+	struct list_head *flush_list = xdp_storage_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq, *tmp;
 
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -421,7 +443,7 @@ void __dev_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool dev_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&dev_flush_list)))
+	if (list_empty(xdp_storage_get_dev_flush_list()))
 		return false;
 	__dev_flush();
 	return true;
@@ -452,7 +474,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
 {
-	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+	struct list_head *flush_list = xdp_storage_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
 
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -1155,15 +1177,12 @@ static struct notifier_block dev_map_notifier = {
 
 static int __init dev_map_init(void)
 {
-	int cpu;
-
 	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
 	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
 		     offsetof(struct _bpf_dtab_netdev, dev));
 	register_netdevice_notifier(&dev_map_notifier);
 
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
+	init_dev_flush_list();
 	return 0;
 }
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b78c0e095e221..3050739cfe1e0 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,30 @@
 #define TX_BATCH_SIZE 32
 #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
 
+#ifndef CONFIG_PREEMPT_RT
 static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
 
+static struct list_head *xdp_storage_get_xskmap_flush_list(void)
+{
+	return this_cpu_ptr(&xskmap_flush_list);
+}
+
+static void init_xskmap_flush_list(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
+}
+
+#else
+
+static void init_xskmap_flush_list(void)
+{
+}
+
+#endif
+
 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -372,7 +394,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	struct list_head *flush_list = xdp_storage_get_xskmap_flush_list();
 	int err;
 
 	err = xsk_rcv(xs, xdp);
@@ -387,7 +409,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 void __xsk_map_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	struct list_head *flush_list = xdp_storage_get_xskmap_flush_list();
 	struct xdp_sock *xs, *tmp;
 
 	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -399,7 +421,7 @@ void __xsk_map_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool xsk_map_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
+	if (list_empty(xdp_storage_get_xskmap_flush_list()))
 		return false;
 	__xsk_map_flush();
 	return true;
@@ -1770,7 +1792,7 @@ static struct pernet_operations xsk_net_ops = {
 
 static int __init xsk_init(void)
 {
-	int err, cpu;
+	int err;
 
 	err = proto_register(&xsk_proto, 0 /* no slab */);
 	if (err)
@@ -1788,8 +1810,7 @@ static int __init xsk_init(void)
 	if (err)
 		goto out_pernet;
 
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
+	init_xskmap_flush_list();
 	return 0;
 
 out_pernet:
-- 
2.43.0


      parent reply	other threads:[~2024-02-13 14:59 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-13 14:58 [PATCH RFC net-next 0/2] Use per-task storage for XDP-redirects on PREEMPT_RT Sebastian Andrzej Siewior
2024-02-13 14:58 ` [PATCH RFC net-next 1/2] net: Reference bpf_redirect_info via task_struct " Sebastian Andrzej Siewior
2024-02-13 20:50   ` Jesper Dangaard Brouer
2024-02-14 12:19     ` Sebastian Andrzej Siewior
2024-02-14 13:23       ` Toke Høiland-Jørgensen
2024-02-14 14:28         ` Sebastian Andrzej Siewior
2024-02-14 16:08           ` Toke Høiland-Jørgensen
2024-02-14 16:36             ` Sebastian Andrzej Siewior
2024-02-15 20:23               ` Toke Høiland-Jørgensen
2024-02-16 16:57                 ` Sebastian Andrzej Siewior
2024-02-19 19:01                   ` Toke Høiland-Jørgensen
2024-02-20  9:17                     ` Jesper Dangaard Brouer
2024-02-20 10:17                       ` Sebastian Andrzej Siewior
2024-02-20 10:42                         ` Jesper Dangaard Brouer
2024-02-20 12:08                           ` Sebastian Andrzej Siewior
2024-02-20 12:57                             ` Jesper Dangaard Brouer
2024-02-20 15:32                               ` Sebastian Andrzej Siewior
2024-02-22  9:22                                 ` Sebastian Andrzej Siewior
2024-02-22 10:10                                   ` Jesper Dangaard Brouer
2024-02-22 10:58                                     ` Sebastian Andrzej Siewior
2024-02-20 12:10                           ` Dave Taht
2024-02-14 16:13   ` Toke Høiland-Jørgensen
2024-02-15  9:04     ` Sebastian Andrzej Siewior
2024-02-15 12:11       ` Toke Høiland-Jørgensen
2024-02-13 14:58 ` Sebastian Andrzej Siewior [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240213145923.2552753-3-bigeasy@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=haoluo@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=martin.lau@linux.dev \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).