BPF Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH bpf-next v3 0/2] Optimize bpf_redirect_map()/xdp_do_redirect()
@ 2021-02-21 20:09 Björn Töpel
  2021-02-21 20:09 ` [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP Björn Töpel
  2021-02-21 20:09 ` [PATCH bpf-next v3 2/2] bpf, xdp: restructure redirect actions Björn Töpel
  0 siblings, 2 replies; 7+ messages in thread
From: Björn Töpel @ 2021-02-21 20:09 UTC (permalink / raw)
  To: ast, daniel, netdev, bpf
  Cc: Björn Töpel, bjorn.topel, maciej.fijalkowski, hawk,
	toke, magnus.karlsson, john.fastabend, kuba, davem

Hi XDP-folks,

This two patch series contain two optimizations for the
bpf_redirect_map() helper and the xdp_do_redirect() function.

The bpf_redirect_map() optimization is about avoiding the map lookup
dispatching. Instead of having a switch-statement and selecting the
correct lookup function, we let the verifier patch the
bpf_redirect_map() call to a specific lookup function. This way the
run-time lookup is avoided.

The xdp_do_redirect() patch restructures the code, so that the map
pointer indirection can be avoided.

Performance-wise I got 3% improvement for XSKMAP
(sample:xdpsock/rx-drop), and 4% (sample:xdp_redirect_map) on my
machine.

More details in each commit. Changes since the RFC is outlined in each
commit.


Cheers,
Björn


Björn Töpel (2):
  bpf, xdp: per-map bpf_redirect_map functions for XDP
  bpf, xdp: restructure redirect actions

 include/linux/bpf.h        |  21 ++--
 include/linux/filter.h     |  20 +++-
 include/net/xdp_sock.h     |   6 +-
 include/trace/events/xdp.h |  66 +++++++-----
 kernel/bpf/cpumap.c        |   3 +-
 kernel/bpf/devmap.c        |   5 +-
 kernel/bpf/verifier.c      |  17 ++-
 net/core/filter.c          | 216 ++++++++++++++++++-------------------
 net/xdp/xskmap.c           |   1 -
 9 files changed, 195 insertions(+), 160 deletions(-)


base-commit: 7b1e385c9a488de9291eaaa412146d3972e9dec5
-- 
2.27.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP
  2021-02-21 20:09 [PATCH bpf-next v3 0/2] Optimize bpf_redirect_map()/xdp_do_redirect() Björn Töpel
@ 2021-02-21 20:09 ` Björn Töpel
  2021-02-22  7:23   ` Jesper Dangaard Brouer
  2021-02-24 23:38   ` Daniel Borkmann
  2021-02-21 20:09 ` [PATCH bpf-next v3 2/2] bpf, xdp: restructure redirect actions Björn Töpel
  1 sibling, 2 replies; 7+ messages in thread
From: Björn Töpel @ 2021-02-21 20:09 UTC (permalink / raw)
  To: ast, daniel, netdev, bpf
  Cc: Björn Töpel, maciej.fijalkowski, hawk, toke,
	magnus.karlsson, john.fastabend, kuba, davem

From: Björn Töpel <bjorn.topel@intel.com>

Currently the bpf_redirect_map() implementation dispatches to the
correct map-lookup function via a switch-statement. To avoid the
dispatching, this change adds one bpf_redirect_map() implementation per
map. Correct function is automatically selected by the BPF verifier.

v2->v3 : Fix build when CONFIG_NET is not set. (lkp)
v1->v2 : Re-added comment. (Toke)
rfc->v1: Get rid of the macro and use __always_inline. (Jesper)

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/bpf.h    | 20 +++++++-------
 include/linux/filter.h |  9 +++++++
 include/net/xdp_sock.h |  6 ++---
 kernel/bpf/cpumap.c    |  2 +-
 kernel/bpf/devmap.c    |  4 +--
 kernel/bpf/verifier.c  | 17 ++++++++----
 net/core/filter.c      | 61 ++++++++++++++++++++++++++++--------------
 7 files changed, 78 insertions(+), 41 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cccaef1088ea..3dd186eeaf98 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -314,12 +314,14 @@ enum bpf_return_type {
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
 };
 
+typedef u64 (*bpf_func_proto_func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
  * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
  * instructions after verifying
  */
 struct bpf_func_proto {
-	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+	bpf_func_proto_func func;
 	bool gpl_only;
 	bool pkt_access;
 	enum bpf_return_type ret_type;
@@ -1429,9 +1431,11 @@ struct btf *bpf_get_btf_vmlinux(void);
 /* Map specifics */
 struct xdp_buff;
 struct sk_buff;
+struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
 
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
+void *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_flush(void);
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -1441,7 +1445,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
 bool dev_map_can_have_prog(struct bpf_map *map);
 
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
+void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_flush(void);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -1568,14 +1572,12 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 	return -EOPNOTSUPP;
 }
 
-static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
-						       u32 key)
+static inline void  *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	return NULL;
 }
 
-static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
-							     u32 key)
+static inline void  *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
 {
 	return NULL;
 }
@@ -1615,7 +1617,7 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
 }
 
 static inline
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	return NULL;
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3b00fc906ccd..bc2a1ec20d0b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1472,4 +1472,13 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 }
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
+#ifdef CONFIG_NET
+bpf_func_proto_func get_xdp_redirect_func(enum bpf_map_type map_type);
+#else
+static inline bpf_func_proto_func get_xdp_redirect_func(enum bpf_map_type map_type)
+{
+	return NULL;
+}
+#endif
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index cc17bc957548..da4139a58630 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -80,8 +80,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
 
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
+static inline void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 	struct xdp_sock *xs;
@@ -109,8 +108,7 @@ static inline void __xsk_map_flush(void)
 {
 }
 
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
+static inline void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	return NULL;
 }
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 5d1469de6921..a4d2cb93cd69 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -563,7 +563,7 @@ static void cpu_map_free(struct bpf_map *map)
 	kfree(cmap);
 }
 
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
 	struct bpf_cpu_map_entry *rcpu;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 85d9d1b72a33..37ac4cde9713 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -258,7 +258,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
+void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct hlist_head *head = dev_map_index_hash(dtab, key);
@@ -392,7 +392,7 @@ void __dev_flush(void)
  * update happens in parallel here a dev_put wont happen until after reading the
  * ifindex.
  */
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *obj;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3d34ba492d46..89ccc10c6348 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5409,7 +5409,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_delete_elem &&
 	    func_id != BPF_FUNC_map_push_elem &&
 	    func_id != BPF_FUNC_map_pop_elem &&
-	    func_id != BPF_FUNC_map_peek_elem)
+	    func_id != BPF_FUNC_map_peek_elem &&
+	    func_id != BPF_FUNC_redirect_map)
 		return 0;
 
 	if (map == NULL) {
@@ -11545,12 +11546,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 	struct bpf_prog *prog = env->prog;
 	bool expect_blinding = bpf_jit_blinding_enabled(prog);
 	struct bpf_insn *insn = prog->insnsi;
-	const struct bpf_func_proto *fn;
 	const int insn_cnt = prog->len;
 	const struct bpf_map_ops *ops;
 	struct bpf_insn_aux_data *aux;
 	struct bpf_insn insn_buf[16];
 	struct bpf_prog *new_prog;
+	bpf_func_proto_func func;
 	struct bpf_map *map_ptr;
 	int i, ret, cnt, delta = 0;
 
@@ -11860,17 +11861,23 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		}
 
 patch_call_imm:
-		fn = env->ops->get_func_proto(insn->imm, env->prog);
+		if (insn->imm == BPF_FUNC_redirect_map) {
+			aux = &env->insn_aux_data[i];
+			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
+			func = get_xdp_redirect_func(map_ptr->map_type);
+		} else {
+			func = env->ops->get_func_proto(insn->imm, env->prog)->func;
+		}
 		/* all functions that have prototype and verifier allowed
 		 * programs to call them, must be real in-kernel functions
 		 */
-		if (!fn->func) {
+		if (!func) {
 			verbose(env,
 				"kernel subsystem misconfigured func %s#%d\n",
 				func_id_name(insn->imm), insn->imm);
 			return -EFAULT;
 		}
-		insn->imm = fn->func - __bpf_call_base;
+		insn->imm = func - __bpf_call_base;
 	}
 
 	/* Since poke tab is now finalized, publish aux to tracker. */
diff --git a/net/core/filter.c b/net/core/filter.c
index adfdad234674..502e7856f107 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3944,22 +3944,6 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
-static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
-{
-	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP:
-		return __dev_map_lookup_elem(map, index);
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-		return __dev_map_hash_lookup_elem(map, index);
-	case BPF_MAP_TYPE_CPUMAP:
-		return __cpu_map_lookup_elem(map, index);
-	case BPF_MAP_TYPE_XSKMAP:
-		return __xsk_map_lookup_elem(map, index);
-	default:
-		return NULL;
-	}
-}
-
 void bpf_clear_redirect_map(struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri;
@@ -4110,8 +4094,9 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
-	   u64, flags)
+static __always_inline s64 __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+						  void *lookup_elem(struct bpf_map *map,
+								    u32 key))
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
@@ -4119,7 +4104,7 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
 	if (unlikely(flags > XDP_TX))
 		return XDP_ABORTED;
 
-	ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
+	ri->tgt_value = lookup_elem(map, ifindex);
 	if (unlikely(!ri->tgt_value)) {
 		/* If the lookup fails we want to clear out the state in the
 		 * redirect_info struct completely, so that if an eBPF program
@@ -4137,8 +4122,44 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
 	return XDP_REDIRECT;
 }
 
+BPF_CALL_3(bpf_xdp_redirect_devmap, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+}
+
+BPF_CALL_3(bpf_xdp_redirect_devmap_hash, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+}
+
+BPF_CALL_3(bpf_xdp_redirect_cpumap, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+}
+
+BPF_CALL_3(bpf_xdp_redirect_xskmap, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+}
+
+bpf_func_proto_func get_xdp_redirect_func(enum bpf_map_type map_type)
+{
+	switch (map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		return bpf_xdp_redirect_devmap;
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		return bpf_xdp_redirect_devmap_hash;
+	case BPF_MAP_TYPE_CPUMAP:
+		return bpf_xdp_redirect_cpumap;
+	case BPF_MAP_TYPE_XSKMAP:
+		return bpf_xdp_redirect_xskmap;
+	default:
+		return NULL;
+	}
+}
+
+/* NB! .func is NULL! get_xdp_redirect_func() is used instead! */
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
-	.func           = bpf_xdp_redirect_map,
 	.gpl_only       = false,
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_CONST_MAP_PTR,
-- 
2.27.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH bpf-next v3 2/2] bpf, xdp: restructure redirect actions
  2021-02-21 20:09 [PATCH bpf-next v3 0/2] Optimize bpf_redirect_map()/xdp_do_redirect() Björn Töpel
  2021-02-21 20:09 ` [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP Björn Töpel
@ 2021-02-21 20:09 ` Björn Töpel
  2021-02-22  8:10   ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 7+ messages in thread
From: Björn Töpel @ 2021-02-21 20:09 UTC (permalink / raw)
  To: ast, daniel, netdev, bpf
  Cc: Björn Töpel, maciej.fijalkowski, hawk, toke,
	magnus.karlsson, john.fastabend, kuba, davem

From: Björn Töpel <bjorn.topel@intel.com>

The XDP_REDIRECT implementations for maps and non-maps are fairly
similar, but obviously need to take different code paths depending on
if the target is using a map or not. Today, the redirect targets for
XDP either uses a map, or is based on ifindex.

Here, an explicit redirect type is added to bpf_redirect_info, instead
of the actual map. Redirect type, map item/ifindex, and the map_id (if
any) is passed to xdp_do_redirect().

In addition to making the code easier to follow, using an explicit
type in bpf_redirect_info has a slight positive performance impact by
avoiding a pointer indirection for the map type lookup, and instead
use the cacheline for bpf_redirect_info.

Since the actual map is not passed via bpf_redirect_info anymore, the
map lookup is only done in the BPF helper. This means that the
bpf_clear_redirect_map() function can be removed. The actual map item
is RCU protected.

The bpf_redirect_info flags member is not used by XDP, and not
read/written any more. The map member is only written to when
required/used, and not unconditionally.

v1->v2 : Removed warning when CONFIG_BPF_SYSCALL was not set. (lkp)
       : Cleaned up case-clause in xdp_do_generic_redirect_map(). (Toke)
rfc->v1: Use map_id, and remove bpf_clear_redirect_map(). (Toke)

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/bpf.h        |   1 +
 include/linux/filter.h     |  11 ++-
 include/trace/events/xdp.h |  66 +++++++++------
 kernel/bpf/cpumap.c        |   1 -
 kernel/bpf/devmap.c        |   1 -
 net/core/filter.c          | 165 ++++++++++++++++---------------------
 net/xdp/xskmap.c           |   1 -
 7 files changed, 122 insertions(+), 124 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3dd186eeaf98..3e59e4c211bc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1592,6 +1592,7 @@ static inline void __dev_flush(void)
 
 struct xdp_buff;
 struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
 
 static inline
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index bc2a1ec20d0b..54406a7eef2b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,11 +646,20 @@ struct bpf_redirect_info {
 	u32 flags;
 	u32 tgt_index;
 	void *tgt_value;
-	struct bpf_map *map;
+	u32 map_id;
+	u32 tgt_type;
 	u32 kern_flags;
 	struct bpf_nh_params nh;
 };
 
+enum xdp_redirect_type {
+	XDP_REDIR_UNSET,
+	XDP_REDIR_DEV_IFINDEX,
+	XDP_REDIR_DEV_MAP,
+	XDP_REDIR_CPU_MAP,
+	XDP_REDIR_XSK_MAP,
+};
+
 DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 
 /* flags for bpf_redirect_info kern_flags */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 76a97176ab81..538321735447 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
 };
 #endif /* __DEVMAP_OBJ_TYPE */
 
-#define devmap_ifindex(tgt, map)				\
-	(((map->map_type == BPF_MAP_TYPE_DEVMAP ||	\
-		  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
-	  ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
-
 DECLARE_EVENT_CLASS(xdp_redirect_template,
 
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
+		 enum xdp_redirect_type type,
+		 const struct bpf_redirect_info *ri),
 
-	TP_ARGS(dev, xdp, tgt, err, map, index),
+	TP_ARGS(dev, xdp, tgt, err, type, ri),
 
 	TP_STRUCT__entry(
 		__field(int, prog_id)
@@ -111,14 +107,30 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
 	),
 
 	TP_fast_assign(
+		u32 ifindex = 0, map_id = 0, index = ri->tgt_index;
+
+		switch (type) {
+		case XDP_REDIR_DEV_MAP:
+			ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+			fallthrough;
+		case XDP_REDIR_CPU_MAP:
+		case XDP_REDIR_XSK_MAP:
+			map_id = ri->map_id;
+			break;
+		case XDP_REDIR_DEV_IFINDEX:
+			ifindex = (u32)(long)tgt;
+			break;
+		default:
+			break;
+		}
+
 		__entry->prog_id	= xdp->aux->id;
 		__entry->act		= XDP_REDIRECT;
 		__entry->ifindex	= dev->ifindex;
 		__entry->err		= err;
-		__entry->to_ifindex	= map ? devmap_ifindex(tgt, map) :
-						index;
-		__entry->map_id		= map ? map->id : 0;
-		__entry->map_index	= map ? index : 0;
+		__entry->to_ifindex	= ifindex;
+		__entry->map_id		= map_id;
+		__entry->map_index	= index;
 	),
 
 	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +145,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum xdp_redirect_type type,
+		 const struct bpf_redirect_info *ri),
+	TP_ARGS(dev, xdp, tgt, err, type, ri)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum xdp_redirect_type type,
+		 const struct bpf_redirect_info *ri),
+	TP_ARGS(dev, xdp, tgt, err, type, ri)
 );
 
 #define _trace_xdp_redirect(dev, xdp, to)				\
-	 trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
+	trace_xdp_redirect(dev, xdp, NULL, 0, XDP_REDIR_DEV_IFINDEX, NULL)
 
 #define _trace_xdp_redirect_err(dev, xdp, to, err)			\
-	 trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
+	trace_xdp_redirect_err(dev, xdp, NULL, err, XDP_REDIR_DEV_IFINDEX, NULL)
 
-#define _trace_xdp_redirect_map(dev, xdp, to, map, index)		\
-	 trace_xdp_redirect(dev, xdp, to, 0, map, index)
+#define _trace_xdp_redirect_map(dev, xdp, to, type, ri)		\
+	trace_xdp_redirect(dev, xdp, to, 0, type, ri)
 
-#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err)	\
-	 trace_xdp_redirect_err(dev, xdp, to, err, map, index)
+#define _trace_xdp_redirect_map_err(dev, xdp, to, type, ri, err)	\
+	trace_xdp_redirect_err(dev, xdp, to, err, type, ri)
 
 /* not used anymore, but kept around so as not to break old programs */
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum xdp_redirect_type type,
+		 const struct bpf_redirect_info *ri),
+	TP_ARGS(dev, xdp, tgt, err, type, ri)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum xdp_redirect_type type,
+		 const struct bpf_redirect_info *ri),
+	TP_ARGS(dev, xdp, tgt, err, type, ri)
 );
 
 TRACE_EVENT(xdp_cpumap_kthread,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a4d2cb93cd69..b7f4d22f5c8d 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
 	 * complete.
 	 */
 
-	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* For cpu_map the remote CPUs can still be using the entries
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 37ac4cde9713..b5681a98020d 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
-	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* Make sure prior __dev_map_entry_free() have completed. */
diff --git a/net/core/filter.c b/net/core/filter.c
index 502e7856f107..5a08e49f776c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3919,23 +3919,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
-			    struct bpf_map *map, struct xdp_buff *xdp)
-{
-	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP:
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-		return dev_map_enqueue(fwd, xdp, dev_rx);
-	case BPF_MAP_TYPE_CPUMAP:
-		return cpu_map_enqueue(fwd, xdp, dev_rx);
-	case BPF_MAP_TYPE_XSKMAP:
-		return __xsk_map_redirect(fwd, xdp);
-	default:
-		return -EBADRQC;
-	}
-	return 0;
-}
-
 void xdp_do_flush(void)
 {
 	__dev_flush();
@@ -3944,55 +3927,45 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
-void bpf_clear_redirect_map(struct bpf_map *map)
-{
-	struct bpf_redirect_info *ri;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
-		/* Avoid polluting remote cacheline due to writes if
-		 * not needed. Once we pass this test, we need the
-		 * cmpxchg() to make sure it hasn't been changed in
-		 * the meantime by remote CPU.
-		 */
-		if (unlikely(READ_ONCE(ri->map) == map))
-			cmpxchg(&ri->map, map, NULL);
-	}
-}
-
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->tgt_index;
+	enum xdp_redirect_type type = ri->tgt_type;
 	void *fwd = ri->tgt_value;
 	int err;
 
-	ri->tgt_index = 0;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
+	ri->tgt_type = XDP_REDIR_UNSET;
 
-	if (unlikely(!map)) {
-		fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	switch (type) {
+	case XDP_REDIR_DEV_IFINDEX:
+		fwd = dev_get_by_index_rcu(dev_net(dev), (u32)(long)fwd);
 		if (unlikely(!fwd)) {
 			err = -EINVAL;
-			goto err;
+			break;
 		}
-
 		err = dev_xdp_enqueue(fwd, xdp, dev);
-	} else {
-		err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+		break;
+	case XDP_REDIR_DEV_MAP:
+		err = dev_map_enqueue(fwd, xdp, dev);
+		break;
+	case XDP_REDIR_CPU_MAP:
+		err = cpu_map_enqueue(fwd, xdp, dev);
+		break;
+	case XDP_REDIR_XSK_MAP:
+		err = __xsk_map_redirect(fwd, xdp);
+		break;
+	default:
+		err = -EBADRQC;
 	}
 
 	if (unlikely(err))
 		goto err;
 
-	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, type, ri);
 	return 0;
 err:
-	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, type, ri, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -4001,41 +3974,37 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
 				       struct bpf_prog *xdp_prog,
-				       struct bpf_map *map)
+				       void *fwd,
+				       enum xdp_redirect_type type)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	u32 index = ri->tgt_index;
-	void *fwd = ri->tgt_value;
-	int err = 0;
-
-	ri->tgt_index = 0;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
-
-	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
-	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
-		struct bpf_dtab_netdev *dst = fwd;
+	int err;
 
-		err = dev_map_generic_redirect(dst, skb, xdp_prog);
+	switch (type) {
+	case XDP_REDIR_DEV_MAP:
+		err = dev_map_generic_redirect(fwd, skb, xdp_prog);
 		if (unlikely(err))
 			goto err;
-	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
+		break;
+	case XDP_REDIR_XSK_MAP: {
 		struct xdp_sock *xs = fwd;
 
 		err = xsk_generic_rcv(xs, xdp);
 		if (err)
 			goto err;
 		consume_skb(skb);
-	} else {
+		break;
+	}
+	default:
 		/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
 		err = -EBADRQC;
 		goto err;
 	}
 
-	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, type, ri);
 	return 0;
 err:
-	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, type, ri, err);
 	return err;
 }
 
@@ -4043,29 +4012,31 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->tgt_index;
-	struct net_device *fwd;
+	enum xdp_redirect_type type = ri->tgt_type;
+	void *fwd = ri->tgt_value;
 	int err = 0;
 
-	if (map)
-		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
-						   map);
-	ri->tgt_index = 0;
-	fwd = dev_get_by_index_rcu(dev_net(dev), index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
+	ri->tgt_type = XDP_REDIR_UNSET;
+	ri->tgt_value = NULL;
 
-	err = xdp_ok_fwd_dev(fwd, skb->len);
-	if (unlikely(err))
-		goto err;
+	if (type == XDP_REDIR_DEV_IFINDEX) {
+		fwd = dev_get_by_index_rcu(dev_net(dev), (u32)(long)fwd);
+		if (unlikely(!fwd)) {
+			err = -EINVAL;
+			goto err;
+		}
 
-	skb->dev = fwd;
-	_trace_xdp_redirect(dev, xdp_prog, index);
-	generic_xdp_tx(skb, xdp_prog);
-	return 0;
+		err = xdp_ok_fwd_dev(fwd, skb->len);
+		if (unlikely(err))
+			goto err;
+
+		skb->dev = fwd;
+		_trace_xdp_redirect(dev, xdp_prog, index);
+		generic_xdp_tx(skb, xdp_prog);
+		return 0;
+	}
+
+	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, type);
 err:
 	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
 	return err;
@@ -4078,10 +4049,9 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 	if (unlikely(flags))
 		return XDP_ABORTED;
 
-	ri->flags = flags;
-	ri->tgt_index = ifindex;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
+	ri->tgt_type = XDP_REDIR_DEV_IFINDEX;
+	ri->tgt_index = 0;
+	ri->tgt_value = (void *)(long)ifindex;
 
 	return XDP_REDIRECT;
 }
@@ -4096,7 +4066,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 
 static __always_inline s64 __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
 						  void *lookup_elem(struct bpf_map *map,
-								    u32 key))
+								    u32 key),
+						  enum xdp_redirect_type type)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
@@ -4111,35 +4082,39 @@ static __always_inline s64 __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
 		 * performs multiple lookups, the last one always takes
 		 * precedence.
 		 */
-		WRITE_ONCE(ri->map, NULL);
+		ri->tgt_type = XDP_REDIR_UNSET;
 		return flags;
 	}
 
-	ri->flags = flags;
 	ri->tgt_index = ifindex;
-	WRITE_ONCE(ri->map, map);
+	ri->tgt_type = type;
+	ri->map_id = map->id;
 
 	return XDP_REDIRECT;
 }
 
 BPF_CALL_3(bpf_xdp_redirect_devmap, struct bpf_map *, map, u32, ifindex, u64, flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem,
+				      XDP_REDIR_DEV_MAP);
 }
 
 BPF_CALL_3(bpf_xdp_redirect_devmap_hash, struct bpf_map *, map, u32, ifindex, u64, flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem,
+				      XDP_REDIR_DEV_MAP);
 }
 
 BPF_CALL_3(bpf_xdp_redirect_cpumap, struct bpf_map *, map, u32, ifindex, u64, flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem,
+				      XDP_REDIR_CPU_MAP);
 }
 
 BPF_CALL_3(bpf_xdp_redirect_xskmap, struct bpf_map *, map, u32, ifindex, u64, flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem,
+				      XDP_REDIR_XSK_MAP);
 }
 
 bpf_func_proto_func get_xdp_redirect_func(enum bpf_map_type map_type)
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 113fd9017203..c285d3dd04ad 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -87,7 +87,6 @@ static void xsk_map_free(struct bpf_map *map)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 
-	bpf_clear_redirect_map(map);
 	synchronize_net();
 	bpf_map_area_free(m);
 }
-- 
2.27.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP
  2021-02-21 20:09 ` [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP Björn Töpel
@ 2021-02-22  7:23   ` Jesper Dangaard Brouer
  2021-02-24 23:38   ` Daniel Borkmann
  1 sibling, 0 replies; 7+ messages in thread
From: Jesper Dangaard Brouer @ 2021-02-22  7:23 UTC (permalink / raw)
  To: Björn Töpel
  Cc: brouer, ast, daniel, netdev, bpf, Björn Töpel,
	maciej.fijalkowski, hawk, toke, magnus.karlsson, john.fastabend,
	kuba, davem

On Sun, 21 Feb 2021 21:09:53 +0100
Björn Töpel <bjorn.topel@gmail.com> wrote:

> From: Björn Töpel <bjorn.topel@intel.com>
> 
> Currently the bpf_redirect_map() implementation dispatches to the
> correct map-lookup function via a switch-statement. To avoid the
> dispatching, this change adds one bpf_redirect_map() implementation per
> map. Correct function is automatically selected by the BPF verifier.
> 
> v2->v3 : Fix build when CONFIG_NET is not set. (lkp)
> v1->v2 : Re-added comment. (Toke)
> rfc->v1: Get rid of the macro and use __always_inline. (Jesper)
> 
> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  include/linux/bpf.h    | 20 +++++++-------
>  include/linux/filter.h |  9 +++++++
>  include/net/xdp_sock.h |  6 ++---
>  kernel/bpf/cpumap.c    |  2 +-
>  kernel/bpf/devmap.c    |  4 +--
>  kernel/bpf/verifier.c  | 17 ++++++++----
>  net/core/filter.c      | 61 ++++++++++++++++++++++++++++--------------
>  7 files changed, 78 insertions(+), 41 deletions(-)

Love it! :-)

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v3 2/2] bpf, xdp: restructure redirect actions
  2021-02-21 20:09 ` [PATCH bpf-next v3 2/2] bpf, xdp: restructure redirect actions Björn Töpel
@ 2021-02-22  8:10   ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 7+ messages in thread
From: Jesper Dangaard Brouer @ 2021-02-22  8:10 UTC (permalink / raw)
  To: Björn Töpel
  Cc: brouer, ast, daniel, netdev, bpf, Björn Töpel,
	maciej.fijalkowski, hawk, toke, magnus.karlsson, john.fastabend,
	kuba, davem

On Sun, 21 Feb 2021 21:09:54 +0100
Björn Töpel <bjorn.topel@gmail.com> wrote:

> From: Björn Töpel <bjorn.topel@intel.com>
> 
> The XDP_REDIRECT implementations for maps and non-maps are fairly
> similar, but obviously need to take different code paths depending on
> if the target is using a map or not. Today, the redirect targets for
> XDP either uses a map, or is based on ifindex.
> 
> Here, an explicit redirect type is added to bpf_redirect_info, instead
> of the actual map. Redirect type, map item/ifindex, and the map_id (if
> any) is passed to xdp_do_redirect().
> 
> In addition to making the code easier to follow, using an explicit
> type in bpf_redirect_info has a slight positive performance impact by
> avoiding a pointer indirection for the map type lookup, and instead
> use the cacheline for bpf_redirect_info.
> 
> Since the actual map is not passed via bpf_redirect_info anymore, the
> map lookup is only done in the BPF helper. This means that the
> bpf_clear_redirect_map() function can be removed. The actual map item
> is RCU protected.
> 
> The bpf_redirect_info flags member is not used by XDP, and not
> read/written any more. The map member is only written to when
> required/used, and not unconditionally.
> 
> v1->v2 : Removed warning when CONFIG_BPF_SYSCALL was not set. (lkp)
>        : Cleaned up case-clause in xdp_do_generic_redirect_map(). (Toke)
> rfc->v1: Use map_id, and remove bpf_clear_redirect_map(). (Toke)
> 
> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  include/linux/bpf.h        |   1 +
>  include/linux/filter.h     |  11 ++-
>  include/trace/events/xdp.h |  66 +++++++++------
>  kernel/bpf/cpumap.c        |   1 -
>  kernel/bpf/devmap.c        |   1 -
>  net/core/filter.c          | 165 ++++++++++++++++---------------------
>  net/xdp/xskmap.c           |   1 -
>  7 files changed, 122 insertions(+), 124 deletions(-)

I like it! :-)

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP
  2021-02-21 20:09 ` [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP Björn Töpel
  2021-02-22  7:23   ` Jesper Dangaard Brouer
@ 2021-02-24 23:38   ` Daniel Borkmann
  2021-02-25  6:39     ` Björn Töpel
  1 sibling, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2021-02-24 23:38 UTC (permalink / raw)
  To: Björn Töpel, ast, netdev, bpf
  Cc: Björn Töpel, maciej.fijalkowski, hawk, toke,
	magnus.karlsson, john.fastabend, kuba, davem

On 2/21/21 9:09 PM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> Currently the bpf_redirect_map() implementation dispatches to the
> correct map-lookup function via a switch-statement. To avoid the
> dispatching, this change adds one bpf_redirect_map() implementation per
> map. Correct function is automatically selected by the BPF verifier.
> 
> v2->v3 : Fix build when CONFIG_NET is not set. (lkp)
> v1->v2 : Re-added comment. (Toke)
> rfc->v1: Get rid of the macro and use __always_inline. (Jesper)
> 
> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>

[...]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 3d34ba492d46..89ccc10c6348 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -5409,7 +5409,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
>   	    func_id != BPF_FUNC_map_delete_elem &&
>   	    func_id != BPF_FUNC_map_push_elem &&
>   	    func_id != BPF_FUNC_map_pop_elem &&
> -	    func_id != BPF_FUNC_map_peek_elem)
> +	    func_id != BPF_FUNC_map_peek_elem &&
> +	    func_id != BPF_FUNC_redirect_map)
>   		return 0;
>   
>   	if (map == NULL) {
> @@ -11545,12 +11546,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
>   	struct bpf_prog *prog = env->prog;
>   	bool expect_blinding = bpf_jit_blinding_enabled(prog);
>   	struct bpf_insn *insn = prog->insnsi;
> -	const struct bpf_func_proto *fn;
>   	const int insn_cnt = prog->len;
>   	const struct bpf_map_ops *ops;
>   	struct bpf_insn_aux_data *aux;
>   	struct bpf_insn insn_buf[16];
>   	struct bpf_prog *new_prog;
> +	bpf_func_proto_func func;
>   	struct bpf_map *map_ptr;
>   	int i, ret, cnt, delta = 0;
>   
> @@ -11860,17 +11861,23 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
>   		}
>   
>   patch_call_imm:
> -		fn = env->ops->get_func_proto(insn->imm, env->prog);
> +		if (insn->imm == BPF_FUNC_redirect_map) {
> +			aux = &env->insn_aux_data[i];
> +			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
> +			func = get_xdp_redirect_func(map_ptr->map_type);

Nope, this is broken. :/ The map_ptr could be poisoned, so unconditionally fetching
map_ptr->map_type can crash the box for specially crafted BPF progs.

Also, given you add the related BPF_CALL_3() functions below, what is the reason
to not properly integrate this like the map ops near patch_map_ops_generic?

> +		} else {
> +			func = env->ops->get_func_proto(insn->imm, env->prog)->func;
> +		}
>   		/* all functions that have prototype and verifier allowed
>   		 * programs to call them, must be real in-kernel functions
>   		 */
> -		if (!fn->func) {
> +		if (!func) {
>   			verbose(env,
>   				"kernel subsystem misconfigured func %s#%d\n",
>   				func_id_name(insn->imm), insn->imm);
>   			return -EFAULT;
>   		}
> -		insn->imm = fn->func - __bpf_call_base;
> +		insn->imm = func - __bpf_call_base;
>   	}
>   
>   	/* Since poke tab is now finalized, publish aux to tracker. */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index adfdad234674..502e7856f107 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3944,22 +3944,6 @@ void xdp_do_flush(void)
>   }
>   EXPORT_SYMBOL_GPL(xdp_do_flush);
>   
> -static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
> -{
> -	switch (map->map_type) {
> -	case BPF_MAP_TYPE_DEVMAP:
> -		return __dev_map_lookup_elem(map, index);
> -	case BPF_MAP_TYPE_DEVMAP_HASH:
> -		return __dev_map_hash_lookup_elem(map, index);
> -	case BPF_MAP_TYPE_CPUMAP:
> -		return __cpu_map_lookup_elem(map, index);
> -	case BPF_MAP_TYPE_XSKMAP:
> -		return __xsk_map_lookup_elem(map, index);
> -	default:
> -		return NULL;
> -	}
> -}
> -
>   void bpf_clear_redirect_map(struct bpf_map *map)
>   {
>   	struct bpf_redirect_info *ri;
> @@ -4110,8 +4094,9 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
>   	.arg2_type      = ARG_ANYTHING,
>   };
>   
> -BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
> -	   u64, flags)
> +static __always_inline s64 __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
> +						  void *lookup_elem(struct bpf_map *map,
> +								    u32 key))
>   {
>   	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>   
> @@ -4119,7 +4104,7 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>   	if (unlikely(flags > XDP_TX))
>   		return XDP_ABORTED;
>   
> -	ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
> +	ri->tgt_value = lookup_elem(map, ifindex);
>   	if (unlikely(!ri->tgt_value)) {
>   		/* If the lookup fails we want to clear out the state in the
>   		 * redirect_info struct completely, so that if an eBPF program
> @@ -4137,8 +4122,44 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
>   	return XDP_REDIRECT;
>   }
>   
> +BPF_CALL_3(bpf_xdp_redirect_devmap, struct bpf_map *, map, u32, ifindex, u64, flags)
> +{
> +	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
> +}
> +
> +BPF_CALL_3(bpf_xdp_redirect_devmap_hash, struct bpf_map *, map, u32, ifindex, u64, flags)
> +{
> +	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
> +}
> +
> +BPF_CALL_3(bpf_xdp_redirect_cpumap, struct bpf_map *, map, u32, ifindex, u64, flags)
> +{
> +	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
> +}
> +
> +BPF_CALL_3(bpf_xdp_redirect_xskmap, struct bpf_map *, map, u32, ifindex, u64, flags)
> +{
> +	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
> +}
> +
> +bpf_func_proto_func get_xdp_redirect_func(enum bpf_map_type map_type)
> +{
> +	switch (map_type) {
> +	case BPF_MAP_TYPE_DEVMAP:
> +		return bpf_xdp_redirect_devmap;
> +	case BPF_MAP_TYPE_DEVMAP_HASH:
> +		return bpf_xdp_redirect_devmap_hash;
> +	case BPF_MAP_TYPE_CPUMAP:
> +		return bpf_xdp_redirect_cpumap;
> +	case BPF_MAP_TYPE_XSKMAP:
> +		return bpf_xdp_redirect_xskmap;
> +	default:
> +		return NULL;
> +	}
> +}
> +
> +/* NB! .func is NULL! get_xdp_redirect_func() is used instead! */
>   static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
> -	.func           = bpf_xdp_redirect_map,
>   	.gpl_only       = false,
>   	.ret_type       = RET_INTEGER,
>   	.arg1_type      = ARG_CONST_MAP_PTR,
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP
  2021-02-24 23:38   ` Daniel Borkmann
@ 2021-02-25  6:39     ` Björn Töpel
  0 siblings, 0 replies; 7+ messages in thread
From: Björn Töpel @ 2021-02-25  6:39 UTC (permalink / raw)
  To: Daniel Borkmann, Björn Töpel, ast, netdev, bpf
  Cc: maciej.fijalkowski, hawk, toke, magnus.karlsson, john.fastabend,
	kuba, davem

On 2021-02-25 00:38, Daniel Borkmann wrote:
> On 2/21/21 9:09 PM, Björn Töpel wrote:
>> From: Björn Töpel <bjorn.topel@intel.com>
>>
>> Currently the bpf_redirect_map() implementation dispatches to the
>> correct map-lookup function via a switch-statement. To avoid the
>> dispatching, this change adds one bpf_redirect_map() implementation per
>> map. Correct function is automatically selected by the BPF verifier.
>>
>> v2->v3 : Fix build when CONFIG_NET is not set. (lkp)
>> v1->v2 : Re-added comment. (Toke)
>> rfc->v1: Get rid of the macro and use __always_inline. (Jesper)
>>
>> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> 
> [...]
> 
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 3d34ba492d46..89ccc10c6348 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -5409,7 +5409,8 @@ record_func_map(struct bpf_verifier_env *env, 
>> struct bpf_call_arg_meta *meta,
>>           func_id != BPF_FUNC_map_delete_elem &&
>>           func_id != BPF_FUNC_map_push_elem &&
>>           func_id != BPF_FUNC_map_pop_elem &&
>> -        func_id != BPF_FUNC_map_peek_elem)
>> +        func_id != BPF_FUNC_map_peek_elem &&
>> +        func_id != BPF_FUNC_redirect_map)
>>           return 0;
>>       if (map == NULL) {
>> @@ -11545,12 +11546,12 @@ static int fixup_bpf_calls(struct 
>> bpf_verifier_env *env)
>>       struct bpf_prog *prog = env->prog;
>>       bool expect_blinding = bpf_jit_blinding_enabled(prog);
>>       struct bpf_insn *insn = prog->insnsi;
>> -    const struct bpf_func_proto *fn;
>>       const int insn_cnt = prog->len;
>>       const struct bpf_map_ops *ops;
>>       struct bpf_insn_aux_data *aux;
>>       struct bpf_insn insn_buf[16];
>>       struct bpf_prog *new_prog;
>> +    bpf_func_proto_func func;
>>       struct bpf_map *map_ptr;
>>       int i, ret, cnt, delta = 0;
>> @@ -11860,17 +11861,23 @@ static int fixup_bpf_calls(struct 
>> bpf_verifier_env *env)
>>           }
>>   patch_call_imm:
>> -        fn = env->ops->get_func_proto(insn->imm, env->prog);
>> +        if (insn->imm == BPF_FUNC_redirect_map) {
>> +            aux = &env->insn_aux_data[i];
>> +            map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
>> +            func = get_xdp_redirect_func(map_ptr->map_type);
> 
> Nope, this is broken. :/ The map_ptr could be poisoned, so 
> unconditionally fetching
> map_ptr->map_type can crash the box for specially crafted BPF progs.
>

Thanks for explaining, Daniel! I'll address that!

> Also, given you add the related BPF_CALL_3() functions below, what is 
> the reason
> to not properly integrate this like the map ops near patch_map_ops_generic?
>

...and will have look how the map-patching works!


Cheers,
Björn

[...]

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-21 20:09 [PATCH bpf-next v3 0/2] Optimize bpf_redirect_map()/xdp_do_redirect() Björn Töpel
2021-02-21 20:09 ` [PATCH bpf-next v3 1/2] bpf, xdp: per-map bpf_redirect_map functions for XDP Björn Töpel
2021-02-22  7:23   ` Jesper Dangaard Brouer
2021-02-24 23:38   ` Daniel Borkmann
2021-02-25  6:39     ` Björn Töpel
2021-02-21 20:09 ` [PATCH bpf-next v3 2/2] bpf, xdp: restructure redirect actions Björn Töpel
2021-02-22  8:10   ` Jesper Dangaard Brouer

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org
	public-inbox-index bpf

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git