* [PATCH bpf-next v4 1/3] xsk: store struct xdp_sock as a flexible array member of the XSKMAP
2019-10-31 8:47 [PATCH bpf-next v4 0/3] xsk: XSKMAP performance improvements Björn Töpel
@ 2019-10-31 8:47 ` Björn Töpel
2019-10-31 8:47 ` [PATCH bpf-next v4 2/3] bpf: implement map_gen_lookup() callback for XSKMAP Björn Töpel
2019-10-31 8:47 ` [PATCH bpf-next v4 3/3] xsk: restructure/inline XSKMAP lookup/redirect/flush Björn Töpel
2 siblings, 0 replies; 6+ messages in thread
From: Björn Töpel @ 2019-10-31 8:47 UTC (permalink / raw)
To: netdev, ast, daniel, jakub.kicinski
Cc: Björn Töpel, bpf, magnus.karlsson, magnus.karlsson,
maciej.fijalkowski, jonathan.lemon, toke
From: Björn Töpel <bjorn.topel@intel.com>
Prior this commit, the array storing XDP socket instances were stored
in a separate allocated array of the XSKMAP. Now, we store the sockets
as a flexible array member in a similar fashion as the arraymap. Doing
so, we do less pointer chasing in the lookup.
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
kernel/bpf/xskmap.c | 55 +++++++++++++++++++--------------------------
1 file changed, 23 insertions(+), 32 deletions(-)
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 82a1ffe15dfa..edcbd863650e 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -11,9 +11,9 @@
struct xsk_map {
struct bpf_map map;
- struct xdp_sock **xsk_map;
struct list_head __percpu *flush_list;
spinlock_t lock; /* Synchronize map updates */
+ struct xdp_sock *xsk_map[];
};
int xsk_map_inc(struct xsk_map *map)
@@ -80,9 +80,10 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
+ struct bpf_map_memory mem;
+ int cpu, err, numa_node;
struct xsk_map *m;
- int cpu, err;
- u64 cost;
+ u64 cost, size;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -92,44 +93,35 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
return ERR_PTR(-EINVAL);
- m = kzalloc(sizeof(*m), GFP_USER);
- if (!m)
+ numa_node = bpf_map_attr_numa_node(attr);
+ size = struct_size(m, xsk_map, attr->max_entries);
+ cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
+
+ err = bpf_map_charge_init(&mem, cost);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ m = bpf_map_area_alloc(size, numa_node);
+ if (!m) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
+ }
bpf_map_init_from_attr(&m->map, attr);
+ bpf_map_charge_move(&m->map.memory, &mem);
spin_lock_init(&m->lock);
- cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
- cost += sizeof(struct list_head) * num_possible_cpus();
-
- /* Notice returns -EPERM on if map size is larger than memlock limit */
- err = bpf_map_charge_init(&m->map.memory, cost);
- if (err)
- goto free_m;
-
- err = -ENOMEM;
-
m->flush_list = alloc_percpu(struct list_head);
- if (!m->flush_list)
- goto free_charge;
+ if (!m->flush_list) {
+ bpf_map_charge_finish(&m->map.memory);
+ bpf_map_area_free(m);
+ return ERR_PTR(-ENOMEM);
+ }
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
- m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
- sizeof(struct xdp_sock *),
- m->map.numa_node);
- if (!m->xsk_map)
- goto free_percpu;
return &m->map;
-
-free_percpu:
- free_percpu(m->flush_list);
-free_charge:
- bpf_map_charge_finish(&m->map.memory);
-free_m:
- kfree(m);
- return ERR_PTR(err);
}
static void xsk_map_free(struct bpf_map *map)
@@ -139,8 +131,7 @@ static void xsk_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_net();
free_percpu(m->flush_list);
- bpf_map_area_free(m->xsk_map);
- kfree(m);
+ bpf_map_area_free(m);
}
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
--
2.20.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH bpf-next v4 2/3] bpf: implement map_gen_lookup() callback for XSKMAP
2019-10-31 8:47 [PATCH bpf-next v4 0/3] xsk: XSKMAP performance improvements Björn Töpel
2019-10-31 8:47 ` [PATCH bpf-next v4 1/3] xsk: store struct xdp_sock as a flexible array member of the XSKMAP Björn Töpel
@ 2019-10-31 8:47 ` Björn Töpel
2019-10-31 23:48 ` Daniel Borkmann
2019-10-31 8:47 ` [PATCH bpf-next v4 3/3] xsk: restructure/inline XSKMAP lookup/redirect/flush Björn Töpel
2 siblings, 1 reply; 6+ messages in thread
From: Björn Töpel @ 2019-10-31 8:47 UTC (permalink / raw)
To: netdev, ast, daniel, jakub.kicinski
Cc: Maciej Fijalkowski, bpf, magnus.karlsson, magnus.karlsson,
jonathan.lemon, toke
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Inline the xsk_map_lookup_elem() via implementing the map_gen_lookup()
callback. This results in emitting the bpf instructions in place of
bpf_map_lookup_elem() helper call and better performance of bpf
programs.
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
kernel/bpf/xskmap.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index edcbd863650e..fa32f775b4de 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -163,6 +163,22 @@ struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
return xs;
}
+static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+ const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
+ struct bpf_insn *insn = insn_buf;
+
+ *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+ *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
+ *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
+ *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *insn++ = BPF_MOV64_IMM(ret, 0);
+ return insn - insn_buf;
+}
+
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs)
{
@@ -303,6 +319,7 @@ const struct bpf_map_ops xsk_map_ops = {
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
.map_lookup_elem = xsk_map_lookup_elem,
+ .map_gen_lookup = xsk_map_gen_lookup,
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
--
2.20.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH bpf-next v4 3/3] xsk: restructure/inline XSKMAP lookup/redirect/flush
2019-10-31 8:47 [PATCH bpf-next v4 0/3] xsk: XSKMAP performance improvements Björn Töpel
2019-10-31 8:47 ` [PATCH bpf-next v4 1/3] xsk: store struct xdp_sock as a flexible array member of the XSKMAP Björn Töpel
2019-10-31 8:47 ` [PATCH bpf-next v4 2/3] bpf: implement map_gen_lookup() callback for XSKMAP Björn Töpel
@ 2019-10-31 8:47 ` Björn Töpel
2 siblings, 0 replies; 6+ messages in thread
From: Björn Töpel @ 2019-10-31 8:47 UTC (permalink / raw)
To: netdev, ast, daniel, jakub.kicinski
Cc: Björn Töpel, bpf, magnus.karlsson, magnus.karlsson,
maciej.fijalkowski, jonathan.lemon, toke
From: Björn Töpel <bjorn.topel@intel.com>
In this commit the XSKMAP entry lookup function used by the XDP
redirect code is moved from the xskmap.c file to the xdp_sock.h
header, so the lookup can be inlined from, e.g., the
bpf_xdp_redirect_map() function.
Further the __xsk_map_redirect() and __xsk_map_flush() is moved to the
xsk.c, which lets the compiler inline the xsk_rcv() and xsk_flush()
functions.
Finally, all the XDP socket functions were moved from linux/bpf.h to
net/xdp_sock.h, where most of the XDP sockets functions are anyway.
This yields a ~2% performance boost for the xdpsock "rx_drop"
scenario.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
include/linux/bpf.h | 25 ---------------------
include/net/xdp_sock.h | 51 ++++++++++++++++++++++++++++++++----------
kernel/bpf/xskmap.c | 48 ---------------------------------------
net/xdp/xsk.c | 33 +++++++++++++++++++++++++--
4 files changed, 70 insertions(+), 87 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 171be30fe0ae..ab8ee7543258 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1004,31 +1004,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
}
#endif
-#if defined(CONFIG_XDP_SOCKETS)
-struct xdp_sock;
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs);
-void __xsk_map_flush(struct bpf_map *map);
-#else
-struct xdp_sock;
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
-static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void __xsk_map_flush(struct bpf_map *map)
-{
-}
-#endif
-
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index c9398ce7960f..e3780e4b74e1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -69,7 +69,14 @@ struct xdp_umem {
/* Nodes are linked in the struct xdp_sock map_list field, and used to
* track which maps a certain socket reside in.
*/
-struct xsk_map;
+
+struct xsk_map {
+ struct bpf_map map;
+ struct list_head __percpu *flush_list;
+ spinlock_t lock; /* Synchronize map updates */
+ struct xdp_sock *xsk_map[];
+};
+
struct xsk_map_node {
struct list_head node;
struct xsk_map *map;
@@ -109,8 +116,6 @@ struct xdp_sock {
struct xdp_buff;
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
/* Used from netdev driver */
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
@@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
void xsk_map_put(struct xsk_map *map);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock *xs;
+
+ if (key >= map->max_entries)
+ return NULL;
+
+ xs = READ_ONCE(m->xsk_map[key]);
+ return xs;
+}
static inline u64 xsk_umem_extract_addr(u64 addr)
{
@@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return -ENOTSUPP;
}
-static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
-{
- return -ENOTSUPP;
-}
-
-static inline void xsk_flush(struct xdp_sock *xs)
-{
-}
-
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return false;
@@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ return NULL;
+}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index fa32f775b4de..e3a9eb411586 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -9,13 +9,6 @@
#include <linux/slab.h>
#include <linux/sched.h>
-struct xsk_map {
- struct bpf_map map;
- struct list_head __percpu *flush_list;
- spinlock_t lock; /* Synchronize map updates */
- struct xdp_sock *xsk_map[];
-};
-
int xsk_map_inc(struct xsk_map *map)
{
struct bpf_map *m = &map->map;
@@ -151,18 +144,6 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs;
-
- if (key >= map->max_entries)
- return NULL;
-
- xs = READ_ONCE(m->xsk_map[key]);
- return xs;
-}
-
static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
@@ -179,35 +160,6 @@ static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf;
}
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct list_head *flush_list = this_cpu_ptr(m->flush_list);
- int err;
-
- err = xsk_rcv(xs, xdp);
- if (err)
- return err;
-
- if (!xs->flush_node.prev)
- list_add(&xs->flush_node, flush_list);
-
- return 0;
-}
-
-void __xsk_map_flush(struct bpf_map *map)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct list_head *flush_list = this_cpu_ptr(m->flush_list);
- struct xdp_sock *xs, *tmp;
-
- list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
- xsk_flush(xs);
- __list_del_clearprev(&xs->flush_node);
- }
-}
-
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
WARN_ON_ONCE(!rcu_read_lock_held());
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9044073fbf22..6040bc2b0088 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -196,7 +196,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
u32 len;
@@ -212,7 +212,7 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
}
-void xsk_flush(struct xdp_sock *xs)
+static void xsk_flush(struct xdp_sock *xs)
{
xskq_produce_flush_desc(xs->rx);
xs->sk.sk_data_ready(&xs->sk);
@@ -264,6 +264,35 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return err;
}
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+ int err;
+
+ err = xsk_rcv(xs, xdp);
+ if (err)
+ return err;
+
+ if (!xs->flush_node.prev)
+ list_add(&xs->flush_node, flush_list);
+
+ return 0;
+}
+
+void __xsk_map_flush(struct bpf_map *map)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+ struct xdp_sock *xs, *tmp;
+
+ list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
+ xsk_flush(xs);
+ __list_del_clearprev(&xs->flush_node);
+ }
+}
+
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
xskq_produce_flush_addr_n(umem->cq, nb_entries);
--
2.20.1
^ permalink raw reply related [flat|nested] 6+ messages in thread