BPF Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
@ 2020-02-08 15:41 Jiri Olsa
  2020-02-08 15:41 ` [PATCH 01/14] x86/mm: Rename is_kernel_text to __is_kernel_text Jiri Olsa
                   ` (15 more replies)
  0 siblings, 16 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

hi,
this patchset adds trampoline and dispatcher objects
to be visible in /proc/kallsyms. The last patch also
adds sorting for all bpf objects in /proc/kallsyms.

  $ sudo cat /proc/kallsyms | tail -20
  ...
  ffffffffa050f000 t bpf_prog_5a2b06eab81b8f51    [bpf]
  ffffffffa0511000 t bpf_prog_6deef7357e7b4530    [bpf]
  ffffffffa0542000 t bpf_trampoline_13832 [bpf]
  ffffffffa0548000 t bpf_prog_96f1b5bf4e4cc6dc_mutex_lock [bpf]
  ffffffffa0572000 t bpf_prog_d1c63e29ad82c4ab_bpf_prog1  [bpf]
  ffffffffa0585000 t bpf_prog_e314084d332a5338__dissect   [bpf]
  ffffffffa0587000 t bpf_prog_59785a79eac7e5d2_mutex_unlock       [bpf]
  ffffffffa0589000 t bpf_prog_d0db6e0cac050163_mutex_lock [bpf]
  ffffffffa058d000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
  ffffffffa05df000 t bpf_trampoline_25637 [bpf]
  ffffffffa05e3000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
  ffffffffa05e5000 t bpf_prog_3b185187f1855c4c    [bpf]
  ffffffffa05e7000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
  ffffffffa05eb000 t bpf_prog_93cebb259dd5c4b2_do_sys_open        [bpf]
  ffffffffa0677000 t bpf_dispatcher_xdp   [bpf]

thanks,
jirka


---
Björn Töpel (1):
      bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER

Jiri Olsa (13):
      x86/mm: Rename is_kernel_text to __is_kernel_text
      bpf: Add struct bpf_ksym
      bpf: Add name to struct bpf_ksym
      bpf: Add lnode list node to struct bpf_ksym
      bpf: Add bpf_kallsyms_tree tree
      bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del
      bpf: Separate kallsyms add/del functions
      bpf: Add bpf_ksym_add/del functions
      bpf: Re-initialize lnode in bpf_ksym_del
      bpf: Rename bpf_tree to bpf_progs_tree
      bpf: Add trampolines to kallsyms
      bpf: Add dispatchers to kallsyms
      bpf: Sort bpf kallsyms symbols

 arch/x86/mm/init_32.c   |  14 ++++++----
 include/linux/bpf.h     |  54 ++++++++++++++++++++++++++------------
 include/linux/filter.h  |  13 +++-------
 kernel/bpf/core.c       | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
 kernel/bpf/dispatcher.c |   6 +++++
 kernel/bpf/trampoline.c |  23 ++++++++++++++++
 kernel/events/core.c    |   4 +--
 net/core/filter.c       |   5 ++--
 8 files changed, 219 insertions(+), 82 deletions(-)


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 01/14] x86/mm: Rename is_kernel_text to __is_kernel_text
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
@ 2020-02-08 15:41 ` Jiri Olsa
  2020-02-08 15:41 ` [PATCH 02/14] bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER Jiri Olsa
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: Dave Hansen, Andy Lutomirski, Peter Zijlstra, kbuild test robot,
	netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

The kbuild test robot reported compile issue on x86 in one of
the following patches that adds <linux/kallsyms.h> include into
<linux/bpf.h>, which is picked up by init_32.c object.

The problem is that <linux/kallsyms.h> defines global function
is_kernel_text which colides with the static function of the
same name defined in init_32.c:

  $ make ARCH=i386
  ...
  >> arch/x86/mm/init_32.c:241:19: error: redefinition of 'is_kernel_text'
    static inline int is_kernel_text(unsigned long addr)
                      ^~~~~~~~~~~~~~
   In file included from include/linux/bpf.h:21:0,
                    from include/linux/bpf-cgroup.h:5,
                    from include/linux/cgroup-defs.h:22,
                    from include/linux/cgroup.h:28,
                    from include/linux/hugetlb.h:9,
                    from arch/x86/mm/init_32.c:18:
   include/linux/kallsyms.h:31:19: note: previous definition of 'is_kernel_text' was here
    static inline int is_kernel_text(unsigned long addr)

Renaming the init_32.c is_kernel_text function to __is_kernel_text.

Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/mm/init_32.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 23df4885bbed..eb6ede2c3d43 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -238,7 +238,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 	}
 }
 
-static inline int is_kernel_text(unsigned long addr)
+/*
+ * The <linux/kallsyms.h> already defines is_kernel_text,
+ * using '__' prefix not to get in conflict.
+ */
+static inline int __is_kernel_text(unsigned long addr)
 {
 	if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
 		return 1;
@@ -328,8 +332,8 @@ kernel_physical_mapping_init(unsigned long start,
 				addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
 					PAGE_OFFSET + PAGE_SIZE-1;
 
-				if (is_kernel_text(addr) ||
-				    is_kernel_text(addr2))
+				if (__is_kernel_text(addr) ||
+				    __is_kernel_text(addr2))
 					prot = PAGE_KERNEL_LARGE_EXEC;
 
 				pages_2m++;
@@ -354,7 +358,7 @@ kernel_physical_mapping_init(unsigned long start,
 				 */
 				pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
 
-				if (is_kernel_text(addr))
+				if (__is_kernel_text(addr))
 					prot = PAGE_KERNEL_EXEC;
 
 				pages_4k++;
@@ -881,7 +885,7 @@ static void mark_nxdata_nx(void)
 	 */
 	unsigned long start = PFN_ALIGN(_etext);
 	/*
-	 * This comes from is_kernel_text upper limit. Also HPAGE where used:
+	 * This comes from __is_kernel_text upper limit. Also HPAGE where used:
 	 */
 	unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
 
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 02/14] bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
  2020-02-08 15:41 ` [PATCH 01/14] x86/mm: Rename is_kernel_text to __is_kernel_text Jiri Olsa
@ 2020-02-08 15:41 ` Jiri Olsa
  2020-02-08 15:41 ` [PATCH 03/14] bpf: Add struct bpf_ksym Jiri Olsa
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

From: Björn Töpel <bjorn.topel@intel.com>

Adding bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER,
so all the dispatchers have the common name prefix.

And also a small '_' cleanup for bpf_dispatcher_nopfunc function
name.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h    | 21 +++++++++++----------
 include/linux/filter.h |  7 +++----
 net/core/filter.c      |  5 ++---
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8e9ad3943cd9..15c5f351f837 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -512,7 +512,7 @@ struct bpf_dispatcher {
 	u32 image_off;
 };
 
-static __always_inline unsigned int bpf_dispatcher_nopfunc(
+static __always_inline unsigned int bpf_dispatcher_nop_func(
 	const void *ctx,
 	const struct bpf_insn *insnsi,
 	unsigned int (*bpf_func)(const void *,
@@ -527,7 +527,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 #define BPF_DISPATCHER_INIT(name) {			\
 	.mutex = __MUTEX_INITIALIZER(name.mutex),	\
-	.func = &name##func,				\
+	.func = &name##_func,				\
 	.progs = {},					\
 	.num_progs = 0,					\
 	.image = NULL,					\
@@ -535,7 +535,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr);
 }
 
 #define DEFINE_BPF_DISPATCHER(name)					\
-	noinline unsigned int name##func(				\
+	noinline unsigned int bpf_dispatcher_##name##_func(		\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
 		unsigned int (*bpf_func)(const void *,			\
@@ -543,17 +543,18 @@ void bpf_trampoline_put(struct bpf_trampoline *tr);
 	{								\
 		return bpf_func(ctx, insnsi);				\
 	}								\
-	EXPORT_SYMBOL(name##func);			\
-	struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name);
+	EXPORT_SYMBOL(bpf_dispatcher_##name##_func);			\
+	struct bpf_dispatcher bpf_dispatcher_##name =			\
+		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
 #define DECLARE_BPF_DISPATCHER(name)					\
-	unsigned int name##func(					\
+	unsigned int bpf_dispatcher_##name##_func(			\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
 		unsigned int (*bpf_func)(const void *,			\
 					 const struct bpf_insn *));	\
-	extern struct bpf_dispatcher name;
-#define BPF_DISPATCHER_FUNC(name) name##func
-#define BPF_DISPATCHER_PTR(name) (&name)
+	extern struct bpf_dispatcher bpf_dispatcher_##name;
+#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
+#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 				struct bpf_prog *to);
 struct bpf_image {
@@ -579,7 +580,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
 static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
 #define DEFINE_BPF_DISPATCHER(name)
 #define DECLARE_BPF_DISPATCHER(name)
-#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc
+#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func
 #define BPF_DISPATCHER_PTR(name) NULL
 static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
 					      struct bpf_prog *from,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f349e2c0884c..eafe72644282 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 	ret; })
 
 #define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx,		\
-					       bpf_dispatcher_nopfunc)
+					       bpf_dispatcher_nop_func)
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
@@ -701,7 +701,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 	return res;
 }
 
-DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+DECLARE_BPF_DISPATCHER(xdp)
 
 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 					    struct xdp_buff *xdp)
@@ -712,8 +712,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 	 * already takes rcu_read_lock() when fetching the program, so
 	 * it's not necessary here anymore.
 	 */
-	return __BPF_PROG_RUN(prog, xdp,
-			      BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp));
+	return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
 }
 
 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index 792e3744b915..5db435141e16 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8835,10 +8835,9 @@ const struct bpf_prog_ops sk_reuseport_prog_ops = {
 };
 #endif /* CONFIG_INET */
 
-DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+DEFINE_BPF_DISPATCHER(xdp)
 
 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
 {
-	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
-				   prev_prog, prog);
+	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
 }
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 03/14] bpf: Add struct bpf_ksym
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
  2020-02-08 15:41 ` [PATCH 01/14] x86/mm: Rename is_kernel_text to __is_kernel_text Jiri Olsa
  2020-02-08 15:41 ` [PATCH 02/14] bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER Jiri Olsa
@ 2020-02-08 15:41 ` Jiri Olsa
  2020-02-08 15:41 ` [PATCH 04/14] bpf: Add name to " Jiri Olsa
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Adding 'struct bpf_ksym' object that will carry the
kallsym information for bpf symbol. Adding the start
and end address to begin with. It will be used by
bpf_prog, bpf_trampoline, bpf_dispatcher.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h |  6 ++++++
 kernel/bpf/core.c   | 26 +++++++++++---------------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 15c5f351f837..e39ded33fb0c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -462,6 +462,11 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 u64 notrace __bpf_prog_enter(void);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 
+struct bpf_ksym {
+	unsigned long		 start;
+	unsigned long		 end;
+};
+
 enum bpf_tramp_prog_type {
 	BPF_TRAMP_FENTRY,
 	BPF_TRAMP_FEXIT,
@@ -643,6 +648,7 @@ struct bpf_prog_aux {
 	u32 size_poke_tab;
 	struct latch_tree_node ksym_tnode;
 	struct list_head ksym_lnode;
+	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 973a20d49749..09b5939dcad3 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -524,17 +524,15 @@ int bpf_jit_harden   __read_mostly;
 long bpf_jit_limit   __read_mostly;
 
 static __always_inline void
-bpf_get_prog_addr_region(const struct bpf_prog *prog,
-			 unsigned long *symbol_start,
-			 unsigned long *symbol_end)
+bpf_get_prog_addr_region(const struct bpf_prog *prog)
 {
 	const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
 	unsigned long addr = (unsigned long)hdr;
 
 	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
 
-	*symbol_start = addr;
-	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
+	prog->aux->ksym.start = addr;
+	prog->aux->ksym.end   = addr + hdr->pages * PAGE_SIZE;
 }
 
 void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
@@ -575,13 +573,10 @@ void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 static __always_inline unsigned long
 bpf_get_prog_addr_start(struct latch_tree_node *n)
 {
-	unsigned long symbol_start, symbol_end;
 	const struct bpf_prog_aux *aux;
 
 	aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
-	bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
-
-	return symbol_start;
+	return aux->ksym.start;
 }
 
 static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
@@ -593,15 +588,13 @@ static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
 static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
 {
 	unsigned long val = (unsigned long)key;
-	unsigned long symbol_start, symbol_end;
 	const struct bpf_prog_aux *aux;
 
 	aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
-	bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
 
-	if (val < symbol_start)
+	if (val < aux->ksym.start)
 		return -1;
-	if (val >= symbol_end)
+	if (val >= aux->ksym.end)
 		return  1;
 
 	return 0;
@@ -649,6 +642,8 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 	    !capable(CAP_SYS_ADMIN))
 		return;
 
+	bpf_get_prog_addr_region(fp);
+
 	spin_lock_bh(&bpf_lock);
 	bpf_prog_ksym_node_add(fp->aux);
 	spin_unlock_bh(&bpf_lock);
@@ -677,14 +672,15 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 				 unsigned long *off, char *sym)
 {
-	unsigned long symbol_start, symbol_end;
 	struct bpf_prog *prog;
 	char *ret = NULL;
 
 	rcu_read_lock();
 	prog = bpf_prog_kallsyms_find(addr);
 	if (prog) {
-		bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
+		unsigned long symbol_start = prog->aux->ksym.start;
+		unsigned long symbol_end = prog->aux->ksym.end;
+
 		bpf_get_prog_name(prog, sym);
 
 		ret = sym;
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 04/14] bpf: Add name to struct bpf_ksym
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (2 preceding siblings ...)
  2020-02-08 15:41 ` [PATCH 03/14] bpf: Add struct bpf_ksym Jiri Olsa
@ 2020-02-08 15:41 ` " Jiri Olsa
  2020-02-08 15:42 ` [PATCH 05/14] bpf: Add lnode list node " Jiri Olsa
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Adding name to 'struct bpf_ksym' object to carry the name
of the symbol for bpf_prog, bpf_trampoline, bpf_dispatcher.

The current benefit is that name is now generated only when
the symbol is added to the list, so we don't need to generate
it every time it's accessed.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h    | 2 ++
 include/linux/filter.h | 6 ------
 kernel/bpf/core.c      | 8 +++++---
 kernel/events/core.c   | 4 ++--
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e39ded33fb0c..1327b07057a8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -18,6 +18,7 @@
 #include <linux/refcount.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
+#include <linux/kallsyms.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -465,6 +466,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 struct bpf_ksym {
 	unsigned long		 start;
 	unsigned long		 end;
+	char			 name[KSYM_NAME_LEN];
 };
 
 enum bpf_tramp_prog_type {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index eafe72644282..a945c250ad53 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1062,7 +1062,6 @@ bpf_address_lookup(unsigned long addr, unsigned long *size,
 
 void bpf_prog_kallsyms_add(struct bpf_prog *fp);
 void bpf_prog_kallsyms_del(struct bpf_prog *fp);
-void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
 
 #else /* CONFIG_BPF_JIT */
 
@@ -1131,11 +1130,6 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 {
 }
 
-static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
-{
-	sym[0] = '\0';
-}
-
 #endif /* CONFIG_BPF_JIT */
 
 void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 09b5939dcad3..f4f0b3ca95ae 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -535,8 +535,9 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog)
 	prog->aux->ksym.end   = addr + hdr->pages * PAGE_SIZE;
 }
 
-void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+static void bpf_get_prog_name(const struct bpf_prog *prog)
 {
+	char *sym = prog->aux->ksym.name;
 	const char *end = sym + KSYM_NAME_LEN;
 	const struct btf_type *type;
 	const char *func_name;
@@ -643,6 +644,7 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 		return;
 
 	bpf_get_prog_addr_region(fp);
+	bpf_get_prog_name(fp);
 
 	spin_lock_bh(&bpf_lock);
 	bpf_prog_ksym_node_add(fp->aux);
@@ -681,7 +683,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 		unsigned long symbol_start = prog->aux->ksym.start;
 		unsigned long symbol_end = prog->aux->ksym.end;
 
-		bpf_get_prog_name(prog, sym);
+		strncpy(sym, prog->aux->ksym.name, KSYM_NAME_LEN);
 
 		ret = sym;
 		if (size)
@@ -738,7 +740,7 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		if (it++ != symnum)
 			continue;
 
-		bpf_get_prog_name(aux->prog, sym);
+		strncpy(sym, aux->ksym.name, KSYM_NAME_LEN);
 
 		*value = (unsigned long)aux->prog->bpf_func;
 		*type  = BPF_SYM_ELF_TYPE;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2173c23c25b4..c4b01ca30cd4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8250,7 +8250,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
 	int i;
 
 	if (prog->aux->func_cnt == 0) {
-		bpf_get_prog_name(prog, sym);
+		strncpy(sym, prog->aux->ksym.name, KSYM_NAME_LEN);
 		perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
 				   (u64)(unsigned long)prog->bpf_func,
 				   prog->jited_len, unregister, sym);
@@ -8258,7 +8258,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
 		for (i = 0; i < prog->aux->func_cnt; i++) {
 			struct bpf_prog *subprog = prog->aux->func[i];
 
-			bpf_get_prog_name(subprog, sym);
+			strncpy(sym, subprog->aux->ksym.name, KSYM_NAME_LEN);
 			perf_event_ksymbol(
 				PERF_RECORD_KSYMBOL_TYPE_BPF,
 				(u64)(unsigned long)subprog->bpf_func,
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 05/14] bpf: Add lnode list node to struct bpf_ksym
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (3 preceding siblings ...)
  2020-02-08 15:41 ` [PATCH 04/14] bpf: Add name to " Jiri Olsa
@ 2020-02-08 15:42 ` " Jiri Olsa
  2020-02-08 15:42 ` [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree Jiri Olsa
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Adding lnode list node to 'struct bpf_ksym' object,
so the symbol itself can be chained and used in other
objects like bpf_trampoline and bpf_dispatcher.

Changing iterator to bpf_ksym in bpf_get_kallsym.

This patch also changes the address used for bpf_prog
displayed in /proc/kallsyms. Now it's the address of
the whole bpf_prog region, not the address of the entry
function. I think it make more sense for /proc/kallsyms
to describe all the place used by bpf_prog. We can easily
change it in future if needed.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h |  2 +-
 kernel/bpf/core.c   | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1327b07057a8..da67ca3afa2f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -467,6 +467,7 @@ struct bpf_ksym {
 	unsigned long		 start;
 	unsigned long		 end;
 	char			 name[KSYM_NAME_LEN];
+	struct list_head	 lnode;
 };
 
 enum bpf_tramp_prog_type {
@@ -649,7 +650,6 @@ struct bpf_prog_aux {
 	struct bpf_jit_poke_descriptor *poke_tab;
 	u32 size_poke_tab;
 	struct latch_tree_node ksym_tnode;
-	struct list_head ksym_lnode;
 	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
 	struct bpf_map **used_maps;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f4f0b3ca95ae..b9b7077e60f3 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -97,7 +97,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	fp->aux->prog = fp;
 	fp->jit_requested = ebpf_jit_enabled();
 
-	INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
+	INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
 
 	return fp;
 }
@@ -612,18 +612,18 @@ static struct latch_tree_root bpf_tree __cacheline_aligned;
 
 static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
 {
-	WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
-	list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
+	WARN_ON_ONCE(!list_empty(&aux->ksym.lnode));
+	list_add_tail_rcu(&aux->ksym.lnode, &bpf_kallsyms);
 	latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
 }
 
 static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
 {
-	if (list_empty(&aux->ksym_lnode))
+	if (list_empty(&aux->ksym.lnode))
 		return;
 
 	latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
-	list_del_rcu(&aux->ksym_lnode);
+	list_del_rcu(&aux->ksym.lnode);
 }
 
 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
@@ -633,8 +633,8 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
 
 static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
 {
-	return list_empty(&fp->aux->ksym_lnode) ||
-	       fp->aux->ksym_lnode.prev == LIST_POISON2;
+	return list_empty(&fp->aux->ksym.lnode) ||
+	       fp->aux->ksym.lnode.prev == LIST_POISON2;
 }
 
 void bpf_prog_kallsyms_add(struct bpf_prog *fp)
@@ -728,7 +728,7 @@ const struct exception_table_entry *search_bpf_extables(unsigned long addr)
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym)
 {
-	struct bpf_prog_aux *aux;
+	struct bpf_ksym *ksym;
 	unsigned int it = 0;
 	int ret = -ERANGE;
 
@@ -736,13 +736,13 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		return ret;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
+	list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
 		if (it++ != symnum)
 			continue;
 
-		strncpy(sym, aux->ksym.name, KSYM_NAME_LEN);
+		strncpy(sym, ksym->name, KSYM_NAME_LEN);
 
-		*value = (unsigned long)aux->prog->bpf_func;
+		*value = ksym->start;
 		*type  = BPF_SYM_ELF_TYPE;
 
 		ret = 0;
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (4 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 05/14] bpf: Add lnode list node " Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-11 18:21   ` Andrii Nakryiko
  2020-02-08 15:42 ` [PATCH 07/14] bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del Jiri Olsa
                   ` (9 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

The bpf_tree is used both for kallsyms iterations and searching
for exception tables of bpf programs, which is needed only for
bpf programs.

Adding bpf_kallsyms_tree that will hold symbols for all bpf_prog,
bpf_trampoline and bpf_dispatcher objects and keeping bpf_tree
only for bpf_prog objects exception tables search to keep it fast.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h |  1 +
 kernel/bpf/core.c   | 60 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da67ca3afa2f..151d7b1c8435 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -468,6 +468,7 @@ struct bpf_ksym {
 	unsigned long		 end;
 	char			 name[KSYM_NAME_LEN];
 	struct list_head	 lnode;
+	struct latch_tree_node	 tnode;
 };
 
 enum bpf_tramp_prog_type {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b9b7077e60f3..1daa72341450 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -606,8 +606,46 @@ static const struct latch_tree_ops bpf_tree_ops = {
 	.comp	= bpf_tree_comp,
 };
 
+static __always_inline unsigned long
+bpf_get_ksym_start(struct latch_tree_node *n)
+{
+	const struct bpf_ksym *ksym;
+
+	ksym = container_of(n, struct bpf_ksym, tnode);
+	return ksym->start;
+}
+
+static __always_inline bool
+bpf_ksym_tree_less(struct latch_tree_node *a,
+		   struct latch_tree_node *b)
+{
+	return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
+}
+
+static __always_inline int
+bpf_ksym_tree_comp(void *key, struct latch_tree_node *n)
+{
+	unsigned long val = (unsigned long)key;
+	const struct bpf_ksym *ksym;
+
+	ksym = container_of(n, struct bpf_ksym, tnode);
+
+	if (val < ksym->start)
+		return -1;
+	if (val >= ksym->end)
+		return  1;
+
+	return 0;
+}
+
+static const struct latch_tree_ops bpf_kallsyms_tree_ops = {
+	.less	= bpf_ksym_tree_less,
+	.comp	= bpf_ksym_tree_comp,
+};
+
 static DEFINE_SPINLOCK(bpf_lock);
 static LIST_HEAD(bpf_kallsyms);
+static struct latch_tree_root bpf_kallsyms_tree __cacheline_aligned;
 static struct latch_tree_root bpf_tree __cacheline_aligned;
 
 static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
@@ -615,6 +653,7 @@ static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
 	WARN_ON_ONCE(!list_empty(&aux->ksym.lnode));
 	list_add_tail_rcu(&aux->ksym.lnode, &bpf_kallsyms);
 	latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+	latch_tree_insert(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
 }
 
 static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
@@ -623,6 +662,7 @@ static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
 		return;
 
 	latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+	latch_tree_erase(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
 	list_del_rcu(&aux->ksym.lnode);
 }
 
@@ -671,19 +711,27 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
 	       NULL;
 }
 
+static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
+{
+	struct latch_tree_node *n;
+
+	n = latch_tree_find((void *)addr, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
+	return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
+}
+
 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 				 unsigned long *off, char *sym)
 {
-	struct bpf_prog *prog;
+	struct bpf_ksym *ksym;
 	char *ret = NULL;
 
 	rcu_read_lock();
-	prog = bpf_prog_kallsyms_find(addr);
-	if (prog) {
-		unsigned long symbol_start = prog->aux->ksym.start;
-		unsigned long symbol_end = prog->aux->ksym.end;
+	ksym = bpf_ksym_find(addr);
+	if (ksym) {
+		unsigned long symbol_start = ksym->start;
+		unsigned long symbol_end = ksym->end;
 
-		strncpy(sym, prog->aux->ksym.name, KSYM_NAME_LEN);
+		strncpy(sym, ksym->name, KSYM_NAME_LEN);
 
 		ret = sym;
 		if (size)
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 07/14] bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (5 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-08 15:42 ` [PATCH 08/14] bpf: Separate kallsyms add/del functions Jiri Olsa
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Moving bpf_tree add/del from bpf_prog_ksym_node_add/del,
because it will be used (and renamed) in following patches
for bpf_ksym objects. The bpf_tree is specific for bpf_prog
objects.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/bpf/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1daa72341450..f4c16b362858 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -652,7 +652,6 @@ static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
 {
 	WARN_ON_ONCE(!list_empty(&aux->ksym.lnode));
 	list_add_tail_rcu(&aux->ksym.lnode, &bpf_kallsyms);
-	latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
 	latch_tree_insert(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
 }
 
@@ -661,7 +660,6 @@ static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
 	if (list_empty(&aux->ksym.lnode))
 		return;
 
-	latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
 	latch_tree_erase(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
 	list_del_rcu(&aux->ksym.lnode);
 }
@@ -687,6 +685,7 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 	bpf_get_prog_name(fp);
 
 	spin_lock_bh(&bpf_lock);
+	latch_tree_insert(&fp->aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
 	bpf_prog_ksym_node_add(fp->aux);
 	spin_unlock_bh(&bpf_lock);
 }
@@ -697,6 +696,7 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 		return;
 
 	spin_lock_bh(&bpf_lock);
+	latch_tree_erase(&fp->aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
 	bpf_prog_ksym_node_del(fp->aux);
 	spin_unlock_bh(&bpf_lock);
 }
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 08/14] bpf: Separate kallsyms add/del functions
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (6 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 07/14] bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-08 15:42 ` [PATCH 09/14] bpf: Add bpf_ksym_add/del functions Jiri Olsa
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Moving bpf_prog_ksym_node_add/del to __bpf_ksym_add/del
and changing the argument to 'struct bpf_ksym' object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/bpf/core.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f4c16b362858..ee082c79ac99 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -648,20 +648,20 @@ static LIST_HEAD(bpf_kallsyms);
 static struct latch_tree_root bpf_kallsyms_tree __cacheline_aligned;
 static struct latch_tree_root bpf_tree __cacheline_aligned;
 
-static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
+static void __bpf_ksym_add(struct bpf_ksym *ksym)
 {
-	WARN_ON_ONCE(!list_empty(&aux->ksym.lnode));
-	list_add_tail_rcu(&aux->ksym.lnode, &bpf_kallsyms);
-	latch_tree_insert(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
+	WARN_ON_ONCE(!list_empty(&ksym->lnode));
+	list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
+	latch_tree_insert(&ksym->tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
 }
 
-static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
+static void __bpf_ksym_del(struct bpf_ksym *ksym)
 {
-	if (list_empty(&aux->ksym.lnode))
+	if (list_empty(&ksym->lnode))
 		return;
 
-	latch_tree_erase(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
-	list_del_rcu(&aux->ksym.lnode);
+	latch_tree_erase(&ksym->tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
+	list_del_rcu(&ksym->lnode);
 }
 
 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
@@ -686,7 +686,7 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 
 	spin_lock_bh(&bpf_lock);
 	latch_tree_insert(&fp->aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
-	bpf_prog_ksym_node_add(fp->aux);
+	__bpf_ksym_add(&fp->aux->ksym);
 	spin_unlock_bh(&bpf_lock);
 }
 
@@ -697,7 +697,7 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 
 	spin_lock_bh(&bpf_lock);
 	latch_tree_erase(&fp->aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
-	bpf_prog_ksym_node_del(fp->aux);
+	__bpf_ksym_del(&fp->aux->ksym);
 	spin_unlock_bh(&bpf_lock);
 }
 
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 09/14] bpf: Add bpf_ksym_add/del functions
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (7 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 08/14] bpf: Separate kallsyms add/del functions Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-08 15:42 ` [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del Jiri Olsa
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Adding bpf_ksym_add/del functions as locked version
for __bpf_ksym_add/del. It will be used in following
patches for bpf_trampoline and bpf_dispatcher.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h |  3 +++
 kernel/bpf/core.c   | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 151d7b1c8435..7a4626c8e747 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -573,6 +573,9 @@ struct bpf_image {
 #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
 bool is_bpf_image_address(unsigned long address);
 void *bpf_image_alloc(void);
+/* Called only from code, so there's no need for stubs. */
+void bpf_ksym_add(struct bpf_ksym *ksym);
+void bpf_ksym_del(struct bpf_ksym *ksym);
 #else
 static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ee082c79ac99..73242fd07893 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -655,6 +655,13 @@ static void __bpf_ksym_add(struct bpf_ksym *ksym)
 	latch_tree_insert(&ksym->tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
 }
 
+void bpf_ksym_add(struct bpf_ksym *ksym)
+{
+	spin_lock_bh(&bpf_lock);
+	__bpf_ksym_add(ksym);
+	spin_unlock_bh(&bpf_lock);
+}
+
 static void __bpf_ksym_del(struct bpf_ksym *ksym)
 {
 	if (list_empty(&ksym->lnode))
@@ -664,6 +671,13 @@ static void __bpf_ksym_del(struct bpf_ksym *ksym)
 	list_del_rcu(&ksym->lnode);
 }
 
+void bpf_ksym_del(struct bpf_ksym *ksym)
+{
+	spin_lock_bh(&bpf_lock);
+	__bpf_ksym_del(ksym);
+	spin_unlock_bh(&bpf_lock);
+}
+
 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
 {
 	return fp->jited && !bpf_prog_was_classic(fp);
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (8 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 09/14] bpf: Add bpf_ksym_add/del functions Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-11 18:28   ` Andrii Nakryiko
  2020-02-08 15:42 ` [PATCH 11/14] bpf: Rename bpf_tree to bpf_progs_tree Jiri Olsa
                   ` (5 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

When bpf_prog is removed from kallsyms it's on the way
out to be removed, so we don't care about lnode state.

However the bpf_ksym_del will be used also by bpf_trampoline
and bpf_dispatcher objects, which stay allocated even when
they are not in kallsyms list, hence the lnode re-init.

The list_del_rcu commentary states that we need to call
synchronize_rcu, before we can change/re-init the list_head
pointers.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/bpf/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 73242fd07893..66b17bea286e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -676,6 +676,13 @@ void bpf_ksym_del(struct bpf_ksym *ksym)
 	spin_lock_bh(&bpf_lock);
 	__bpf_ksym_del(ksym);
 	spin_unlock_bh(&bpf_lock);
+
+	/*
+	 * As explained in list_del_rcu, We must call synchronize_rcu
+	 * before changing list_head pointers.
+	 */
+	synchronize_rcu();
+	INIT_LIST_HEAD_RCU(&ksym->lnode);
 }
 
 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 11/14] bpf: Rename bpf_tree to bpf_progs_tree
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (9 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-08 15:42 ` [PATCH 12/14] bpf: Add trampolines to kallsyms Jiri Olsa
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Renaming bpf_tree to bpf_progs_tree and bpf_tree_ops
to bpf_progs_tree_ops to better capture the usage of
the tree, which is used for the bpf_prog objects only
for exception tables search.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/bpf/core.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 66b17bea286e..50af5dcf7ff9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -580,13 +580,14 @@ bpf_get_prog_addr_start(struct latch_tree_node *n)
 	return aux->ksym.start;
 }
 
-static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
-					  struct latch_tree_node *b)
+static __always_inline bool
+bpf_progs_tree_less(struct latch_tree_node *a,
+		    struct latch_tree_node *b)
 {
 	return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
 }
 
-static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
+static __always_inline int bpf_progs_tree_comp(void *key, struct latch_tree_node *n)
 {
 	unsigned long val = (unsigned long)key;
 	const struct bpf_prog_aux *aux;
@@ -601,9 +602,9 @@ static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
 	return 0;
 }
 
-static const struct latch_tree_ops bpf_tree_ops = {
-	.less	= bpf_tree_less,
-	.comp	= bpf_tree_comp,
+static const struct latch_tree_ops bpf_progs_tree_ops = {
+	.less	= bpf_progs_tree_less,
+	.comp	= bpf_progs_tree_comp,
 };
 
 static __always_inline unsigned long
@@ -646,7 +647,7 @@ static const struct latch_tree_ops bpf_kallsyms_tree_ops = {
 static DEFINE_SPINLOCK(bpf_lock);
 static LIST_HEAD(bpf_kallsyms);
 static struct latch_tree_root bpf_kallsyms_tree __cacheline_aligned;
-static struct latch_tree_root bpf_tree __cacheline_aligned;
+static struct latch_tree_root bpf_progs_tree __cacheline_aligned;
 
 static void __bpf_ksym_add(struct bpf_ksym *ksym)
 {
@@ -706,7 +707,8 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 	bpf_get_prog_name(fp);
 
 	spin_lock_bh(&bpf_lock);
-	latch_tree_insert(&fp->aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+	latch_tree_insert(&fp->aux->ksym_tnode, &bpf_progs_tree,
+			  &bpf_progs_tree_ops);
 	__bpf_ksym_add(&fp->aux->ksym);
 	spin_unlock_bh(&bpf_lock);
 }
@@ -717,7 +719,8 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 		return;
 
 	spin_lock_bh(&bpf_lock);
-	latch_tree_erase(&fp->aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+	latch_tree_erase(&fp->aux->ksym_tnode, &bpf_progs_tree,
+			 &bpf_progs_tree_ops);
 	__bpf_ksym_del(&fp->aux->ksym);
 	spin_unlock_bh(&bpf_lock);
 }
@@ -726,7 +729,8 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
 {
 	struct latch_tree_node *n;
 
-	n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
+	n = latch_tree_find((void *)addr, &bpf_progs_tree,
+			    &bpf_progs_tree_ops);
 	return n ?
 	       container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
 	       NULL;
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 12/14] bpf: Add trampolines to kallsyms
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (10 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 11/14] bpf: Rename bpf_tree to bpf_progs_tree Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-11 18:51   ` Andrii Nakryiko
  2020-02-08 15:42 ` [PATCH 13/14] bpf: Add dispatchers " Jiri Olsa
                   ` (3 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Adding trampolines to kallsyms. It's displayed as
  bpf_trampoline_<ID> [bpf]

where ID is the BTF id of the trampoline function.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h     |  2 ++
 kernel/bpf/trampoline.c | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7a4626c8e747..b91bac10d3ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -502,6 +502,7 @@ struct bpf_trampoline {
 	/* Executable image of trampoline */
 	void *image;
 	u64 selector;
+	struct bpf_ksym ksym;
 };
 
 #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
@@ -573,6 +574,7 @@ struct bpf_image {
 #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
 bool is_bpf_image_address(unsigned long address);
 void *bpf_image_alloc(void);
+void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
 /* Called only from code, so there's no need for stubs. */
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 6b264a92064b..1ee29907cbe5 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -96,6 +96,15 @@ bool is_bpf_image_address(unsigned long addr)
 	return ret;
 }
 
+void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
+{
+	struct bpf_image *image = container_of(data, struct bpf_image, data);
+
+	ksym->start = (unsigned long) image;
+	ksym->end = ksym->start + PAGE_SIZE;
+	bpf_ksym_add(ksym);
+}
+
 struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
 	struct bpf_trampoline *tr;
@@ -131,6 +140,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 	for (i = 0; i < BPF_TRAMP_MAX; i++)
 		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
 	tr->image = image;
+	INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
 out:
 	mutex_unlock(&trampoline_mutex);
 	return tr;
@@ -267,6 +277,15 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
 	}
 }
 
+static void bpf_trampoline_kallsyms_add(struct bpf_trampoline *tr)
+{
+	struct bpf_ksym *ksym = &tr->ksym;
+
+	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu",
+		 tr->key & ((u64) (1LU << 32) - 1));
+	bpf_image_ksym_add(tr->image, &tr->ksym);
+}
+
 int bpf_trampoline_link_prog(struct bpf_prog *prog)
 {
 	enum bpf_tramp_prog_type kind;
@@ -311,6 +330,8 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
 	if (err) {
 		hlist_del(&prog->aux->tramp_hlist);
 		tr->progs_cnt[kind]--;
+	} else if (cnt == 0) {
+		bpf_trampoline_kallsyms_add(tr);
 	}
 out:
 	mutex_unlock(&tr->mutex);
@@ -336,6 +357,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
 	}
 	hlist_del(&prog->aux->tramp_hlist);
 	tr->progs_cnt[kind]--;
+	if (!(tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]))
+		bpf_ksym_del(&tr->ksym);
 	err = bpf_trampoline_update(prog->aux->trampoline);
 out:
 	mutex_unlock(&tr->mutex);
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 13/14] bpf: Add dispatchers to kallsyms
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (11 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 12/14] bpf: Add trampolines to kallsyms Jiri Olsa
@ 2020-02-08 15:42 ` " Jiri Olsa
  2020-02-11 19:03   ` Andrii Nakryiko
  2020-02-08 15:42 ` [PATCH 14/14] bpf: Sort bpf kallsyms symbols Jiri Olsa
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Adding dispatchers to kallsyms. It's displayed as
  bpf_dispatcher_<NAME>

where NAME is the name of dispatcher.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h     | 19 ++++++++++++-------
 kernel/bpf/dispatcher.c |  6 ++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b91bac10d3ea..837cdc093d2c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -520,6 +520,7 @@ struct bpf_dispatcher {
 	int num_progs;
 	void *image;
 	u32 image_off;
+	struct bpf_ksym ksym;
 };
 
 static __always_inline unsigned int bpf_dispatcher_nop_func(
@@ -535,13 +536,17 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
 int bpf_trampoline_link_prog(struct bpf_prog *prog);
 int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
-#define BPF_DISPATCHER_INIT(name) {			\
-	.mutex = __MUTEX_INITIALIZER(name.mutex),	\
-	.func = &name##_func,				\
-	.progs = {},					\
-	.num_progs = 0,					\
-	.image = NULL,					\
-	.image_off = 0					\
+#define BPF_DISPATCHER_INIT(_name) {				\
+	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
+	.func = &_name##_func,					\
+	.progs = {},						\
+	.num_progs = 0,						\
+	.image = NULL,						\
+	.image_off = 0,						\
+	.ksym = {						\
+		.name = #_name,					\
+		.lnode = LIST_HEAD_INIT(_name.ksym.lnode),	\
+	},							\
 }
 
 #define DEFINE_BPF_DISPATCHER(name)					\
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index b3e5b214fed8..8771d2cc5840 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -152,6 +152,12 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 	if (!changed)
 		goto out;
 
+	if (!prev_num_progs)
+		bpf_image_ksym_add(d->image, &d->ksym);
+
+	if (!d->num_progs)
+		bpf_ksym_del(&d->ksym);
+
 	bpf_dispatcher_update(d, prev_num_progs);
 out:
 	mutex_unlock(&d->mutex);
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 14/14] bpf: Sort bpf kallsyms symbols
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (12 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 13/14] bpf: Add dispatchers " Jiri Olsa
@ 2020-02-08 15:42 ` Jiri Olsa
  2020-02-11 19:12   ` Andrii Nakryiko
  2020-02-10 15:51 ` [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Björn Töpel
  2020-02-11 19:13 ` Arnaldo Carvalho de Melo
  15 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-08 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, bpf, Andrii Nakryiko, Yonghong Song, Song Liu,
	Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Currently we don't sort bpf_kallsyms and display symbols
in proc/kallsyms as they come in via __bpf_ksym_add.

Using the latch tree to get the next bpf_ksym object
and insert the new symbol ahead of it.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/bpf/core.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 50af5dcf7ff9..c63ff34b2128 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -651,9 +651,30 @@ static struct latch_tree_root bpf_progs_tree __cacheline_aligned;
 
 static void __bpf_ksym_add(struct bpf_ksym *ksym)
 {
+	struct list_head *head = &bpf_kallsyms;
+
 	WARN_ON_ONCE(!list_empty(&ksym->lnode));
-	list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
 	latch_tree_insert(&ksym->tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
+
+	/*
+	 * Add ksym into bpf_kallsyms in ordered position,
+	 * which is prepared for us by latch tree addition.
+	 *
+	 * Find out the next symbol and insert ksym right
+	 * ahead of it. If ksym is the last one, just tail
+	 * add to the bpf_kallsyms.
+	 */
+	if (!list_empty(&bpf_kallsyms)) {
+		struct rb_node *next = rb_next(&ksym->tnode.node[0]);
+
+		if (next) {
+			struct bpf_ksym *ptr;
+
+			ptr = container_of(next, struct bpf_ksym, tnode.node[0]);
+			head = &ptr->lnode;
+		}
+	}
+	list_add_tail_rcu(&ksym->lnode, head);
 }
 
 void bpf_ksym_add(struct bpf_ksym *ksym)
-- 
2.24.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (13 preceding siblings ...)
  2020-02-08 15:42 ` [PATCH 14/14] bpf: Sort bpf kallsyms symbols Jiri Olsa
@ 2020-02-10 15:51 ` Björn Töpel
  2020-02-10 16:17   ` Jiri Olsa
  2020-02-13 16:23   ` Jiri Olsa
  2020-02-11 19:13 ` Arnaldo Carvalho de Melo
  15 siblings, 2 replies; 36+ messages in thread
From: Björn Töpel @ 2020-02-10 15:51 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Netdev, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Sat, 8 Feb 2020 at 16:42, Jiri Olsa <jolsa@kernel.org> wrote:
>
> hi,
> this patchset adds trampoline and dispatcher objects
> to be visible in /proc/kallsyms. The last patch also
> adds sorting for all bpf objects in /proc/kallsyms.
>

Thanks for working on this!

I'm probably missing something with my perf setup; I've applied your
patches, and everything seem to work fine from an kallsyms
perspective:

# grep bpf_dispatcher_xdp /proc/kallsyms
...
ffffffffc0511000 t bpf_dispatcher_xdp   [bpf]

However, when I run
# perf top

I still see the undecorated one:
0.90%  [unknown]                  [k] 0xffffffffc0511037

Any ideas?
Björn

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-10 15:51 ` [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Björn Töpel
@ 2020-02-10 16:17   ` Jiri Olsa
  2020-02-11 19:32     ` Arnaldo Carvalho de Melo
  2020-02-13 16:23   ` Jiri Olsa
  1 sibling, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-10 16:17 UTC (permalink / raw)
  To: Björn Töpel
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Netdev, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Mon, Feb 10, 2020 at 04:51:08PM +0100, Björn Töpel wrote:
> On Sat, 8 Feb 2020 at 16:42, Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > hi,
> > this patchset adds trampoline and dispatcher objects
> > to be visible in /proc/kallsyms. The last patch also
> > adds sorting for all bpf objects in /proc/kallsyms.
> >
> 
> Thanks for working on this!
> 
> I'm probably missing something with my perf setup; I've applied your
> patches, and everything seem to work fine from an kallsyms
> perspective:
> 
> # grep bpf_dispatcher_xdp /proc/kallsyms
> ...
> ffffffffc0511000 t bpf_dispatcher_xdp   [bpf]
> 
> However, when I run
> # perf top
> 
> I still see the undecorated one:
> 0.90%  [unknown]                  [k] 0xffffffffc0511037
> 
> Any ideas?

yea strange.. it should be picked up from /proc/kallsyms as
fallback if there's no other source, I'll check on that
(might be the problem with perf depending on address going
only higher in /proc/kallsyms, while bpf symbols are at the
end and start over from the lowest bpf address)

anyway, in perf we enumerate bpf_progs via the perf events
PERF_BPF_EVENT_PROG_LOAD,PERF_BPF_EVENT_PROG_UNLOAD interface
together with PERF_RECORD_KSYMBOL_TYPE_BPF events

we might need to add something like:
  PERF_RECORD_KSYMBOL_TYPE_BPF_TRAMPOLINE
  PERF_RECORD_KSYMBOL_TYPE_BPF_DISPATCHER

to notify about the area, I'll check on that

however the /proc/kallsyms fallback should work in any
case.. thanks for report ;-)

jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree
  2020-02-08 15:42 ` [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree Jiri Olsa
@ 2020-02-11 18:21   ` Andrii Nakryiko
  2020-02-12 10:49     ` Jiri Olsa
  0 siblings, 1 reply; 36+ messages in thread
From: Andrii Nakryiko @ 2020-02-11 18:21 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> The bpf_tree is used both for kallsyms iterations and searching
> for exception tables of bpf programs, which is needed only for
> bpf programs.
>
> Adding bpf_kallsyms_tree that will hold symbols for all bpf_prog,
> bpf_trampoline and bpf_dispatcher objects and keeping bpf_tree
> only for bpf_prog objects exception tables search to keep it fast.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/bpf.h |  1 +
>  kernel/bpf/core.c   | 60 ++++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index da67ca3afa2f..151d7b1c8435 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -468,6 +468,7 @@ struct bpf_ksym {
>         unsigned long            end;
>         char                     name[KSYM_NAME_LEN];
>         struct list_head         lnode;
> +       struct latch_tree_node   tnode;
>  };
>
>  enum bpf_tramp_prog_type {
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index b9b7077e60f3..1daa72341450 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -606,8 +606,46 @@ static const struct latch_tree_ops bpf_tree_ops = {
>         .comp   = bpf_tree_comp,
>  };
>
> +static __always_inline unsigned long
> +bpf_get_ksym_start(struct latch_tree_node *n)

I thought static functions are never marked as inline in kernel
sources. Are there some special cases when its ok/necessary?

> +{
> +       const struct bpf_ksym *ksym;
> +
> +       ksym = container_of(n, struct bpf_ksym, tnode);
> +       return ksym->start;
> +}
> +
> +static __always_inline bool
> +bpf_ksym_tree_less(struct latch_tree_node *a,
> +                  struct latch_tree_node *b)
> +{
> +       return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
> +}
> +
> +static __always_inline int
> +bpf_ksym_tree_comp(void *key, struct latch_tree_node *n)
> +{
> +       unsigned long val = (unsigned long)key;
> +       const struct bpf_ksym *ksym;
> +
> +       ksym = container_of(n, struct bpf_ksym, tnode);
> +
> +       if (val < ksym->start)
> +               return -1;
> +       if (val >= ksym->end)
> +               return  1;
> +
> +       return 0;
> +}
> +
> +static const struct latch_tree_ops bpf_kallsyms_tree_ops = {

Given all the helper functions use bpf_ksym_tree and bpf_ksym
(bpf_ksym_find) prefixes, call this bpf_ksym_tree_ops?

> +       .less   = bpf_ksym_tree_less,
> +       .comp   = bpf_ksym_tree_comp,
> +};
> +
>  static DEFINE_SPINLOCK(bpf_lock);
>  static LIST_HEAD(bpf_kallsyms);
> +static struct latch_tree_root bpf_kallsyms_tree __cacheline_aligned;

same as above, bpf_ksym_tree for consistency?

>  static struct latch_tree_root bpf_tree __cacheline_aligned;
>
>  static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
> @@ -615,6 +653,7 @@ static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
>         WARN_ON_ONCE(!list_empty(&aux->ksym.lnode));
>         list_add_tail_rcu(&aux->ksym.lnode, &bpf_kallsyms);
>         latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
> +       latch_tree_insert(&aux->ksym.tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
>  }
>

[...]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del
  2020-02-08 15:42 ` [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del Jiri Olsa
@ 2020-02-11 18:28   ` Andrii Nakryiko
  2020-02-12 10:43     ` Jiri Olsa
  0 siblings, 1 reply; 36+ messages in thread
From: Andrii Nakryiko @ 2020-02-11 18:28 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> When bpf_prog is removed from kallsyms it's on the way
> out to be removed, so we don't care about lnode state.
>
> However the bpf_ksym_del will be used also by bpf_trampoline
> and bpf_dispatcher objects, which stay allocated even when
> they are not in kallsyms list, hence the lnode re-init.
>
> The list_del_rcu commentary states that we need to call
> synchronize_rcu, before we can change/re-init the list_head
> pointers.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---

Wouldn't it make more sense to have patches 7 though 10 as a one
patch? It's a generalization of ksym from being bpf_prog-specific to
be more general (which this initialization fix is part of, arguably).

>  kernel/bpf/core.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 73242fd07893..66b17bea286e 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -676,6 +676,13 @@ void bpf_ksym_del(struct bpf_ksym *ksym)
>         spin_lock_bh(&bpf_lock);
>         __bpf_ksym_del(ksym);
>         spin_unlock_bh(&bpf_lock);
> +
> +       /*
> +        * As explained in list_del_rcu, We must call synchronize_rcu
> +        * before changing list_head pointers.
> +        */
> +       synchronize_rcu();
> +       INIT_LIST_HEAD_RCU(&ksym->lnode);
>  }
>
>  static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/14] bpf: Add trampolines to kallsyms
  2020-02-08 15:42 ` [PATCH 12/14] bpf: Add trampolines to kallsyms Jiri Olsa
@ 2020-02-11 18:51   ` Andrii Nakryiko
  2020-02-12 11:10     ` Jiri Olsa
  0 siblings, 1 reply; 36+ messages in thread
From: Andrii Nakryiko @ 2020-02-11 18:51 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Adding trampolines to kallsyms. It's displayed as
>   bpf_trampoline_<ID> [bpf]
>
> where ID is the BTF id of the trampoline function.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/bpf.h     |  2 ++
>  kernel/bpf/trampoline.c | 23 +++++++++++++++++++++++
>  2 files changed, 25 insertions(+)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 7a4626c8e747..b91bac10d3ea 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -502,6 +502,7 @@ struct bpf_trampoline {
>         /* Executable image of trampoline */
>         void *image;
>         u64 selector;
> +       struct bpf_ksym ksym;
>  };
>
>  #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
> @@ -573,6 +574,7 @@ struct bpf_image {
>  #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
>  bool is_bpf_image_address(unsigned long address);
>  void *bpf_image_alloc(void);
> +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
>  /* Called only from code, so there's no need for stubs. */
>  void bpf_ksym_add(struct bpf_ksym *ksym);
>  void bpf_ksym_del(struct bpf_ksym *ksym);
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 6b264a92064b..1ee29907cbe5 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -96,6 +96,15 @@ bool is_bpf_image_address(unsigned long addr)
>         return ret;
>  }
>
> +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
> +{
> +       struct bpf_image *image = container_of(data, struct bpf_image, data);
> +
> +       ksym->start = (unsigned long) image;
> +       ksym->end = ksym->start + PAGE_SIZE;

this seems wrong, use BPF_IMAGE_SIZE instead of PAGE_SIZE?

> +       bpf_ksym_add(ksym);
> +}
> +
>  struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
>  {
>         struct bpf_trampoline *tr;
> @@ -131,6 +140,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
>         for (i = 0; i < BPF_TRAMP_MAX; i++)
>                 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
>         tr->image = image;
> +       INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
>  out:
>         mutex_unlock(&trampoline_mutex);
>         return tr;
> @@ -267,6 +277,15 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
>         }
>  }
>
> +static void bpf_trampoline_kallsyms_add(struct bpf_trampoline *tr)
> +{
> +       struct bpf_ksym *ksym = &tr->ksym;
> +
> +       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu",
> +                tr->key & ((u64) (1LU << 32) - 1));

why the 32-bit truncation? also, wouldn't it be more trivial as (u32)tr->key?

> +       bpf_image_ksym_add(tr->image, &tr->ksym);
> +}
> +
>  int bpf_trampoline_link_prog(struct bpf_prog *prog)
>  {
>         enum bpf_tramp_prog_type kind;
> @@ -311,6 +330,8 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
>         if (err) {
>                 hlist_del(&prog->aux->tramp_hlist);
>                 tr->progs_cnt[kind]--;
> +       } else if (cnt == 0) {
> +               bpf_trampoline_kallsyms_add(tr);

You didn't handle BPF_TRAMP_REPLACE case above.

Also this if (err) { ... } else if (cnt == 0) { } pattern is a bit
convoluted. How about:

if (err) {
   ... whatever ...
   goto out;
}
if (cnt == 0) { ... }

>         }
>  out:
>         mutex_unlock(&tr->mutex);
> @@ -336,6 +357,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
>         }
>         hlist_del(&prog->aux->tramp_hlist);
>         tr->progs_cnt[kind]--;
> +       if (!(tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]))
> +               bpf_ksym_del(&tr->ksym);

same, BPF_TRAMP_REPLACE case. I'd also introduce cnt for consistency
with bpf_trampoline_link_prog?

>         err = bpf_trampoline_update(prog->aux->trampoline);
>  out:
>         mutex_unlock(&tr->mutex);
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 13/14] bpf: Add dispatchers to kallsyms
  2020-02-08 15:42 ` [PATCH 13/14] bpf: Add dispatchers " Jiri Olsa
@ 2020-02-11 19:03   ` Andrii Nakryiko
  2020-02-12 10:52     ` Jiri Olsa
  0 siblings, 1 reply; 36+ messages in thread
From: Andrii Nakryiko @ 2020-02-11 19:03 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Adding dispatchers to kallsyms. It's displayed as
>   bpf_dispatcher_<NAME>
>
> where NAME is the name of dispatcher.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/bpf.h     | 19 ++++++++++++-------
>  kernel/bpf/dispatcher.c |  6 ++++++
>  2 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index b91bac10d3ea..837cdc093d2c 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -520,6 +520,7 @@ struct bpf_dispatcher {
>         int num_progs;
>         void *image;
>         u32 image_off;
> +       struct bpf_ksym ksym;
>  };
>
>  static __always_inline unsigned int bpf_dispatcher_nop_func(
> @@ -535,13 +536,17 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
>  int bpf_trampoline_link_prog(struct bpf_prog *prog);
>  int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
>  void bpf_trampoline_put(struct bpf_trampoline *tr);
> -#define BPF_DISPATCHER_INIT(name) {                    \
> -       .mutex = __MUTEX_INITIALIZER(name.mutex),       \
> -       .func = &name##_func,                           \
> -       .progs = {},                                    \
> -       .num_progs = 0,                                 \
> -       .image = NULL,                                  \
> -       .image_off = 0                                  \
> +#define BPF_DISPATCHER_INIT(_name) {                           \
> +       .mutex = __MUTEX_INITIALIZER(_name.mutex),              \
> +       .func = &_name##_func,                                  \
> +       .progs = {},                                            \
> +       .num_progs = 0,                                         \
> +       .image = NULL,                                          \
> +       .image_off = 0,                                         \
> +       .ksym = {                                               \
> +               .name = #_name,                                 \
> +               .lnode = LIST_HEAD_INIT(_name.ksym.lnode),      \
> +       },                                                      \
>  }
>
>  #define DEFINE_BPF_DISPATCHER(name)                                    \
> diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
> index b3e5b214fed8..8771d2cc5840 100644
> --- a/kernel/bpf/dispatcher.c
> +++ b/kernel/bpf/dispatcher.c
> @@ -152,6 +152,12 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
>         if (!changed)
>                 goto out;
>
> +       if (!prev_num_progs)
> +               bpf_image_ksym_add(d->image, &d->ksym);
> +
> +       if (!d->num_progs)
> +               bpf_ksym_del(&d->ksym);
> +
>         bpf_dispatcher_update(d, prev_num_progs);

On slightly unrelated note: seems like bpf_dispatcher_update won't
propagate any lower-level errors back, which seems pretty bad as a
bunch of stuff can go wrong.

Björn, was it a conscious decision or this just slipped through the cracks?

Jiri, reason I started looking at this was twofold:
1. you add/remove symbol before dispatcher is updated, which is
different order from BPF trampoline updates. I think updating symbols
after successful update makes more sense, no?
2. I was wondering if bpf_dispatcher_update() could return 0/1 (and <0
on error, of course), depending on whether dispatcher is present or
not. Though I'm not hard set on this.

>  out:
>         mutex_unlock(&d->mutex);
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 14/14] bpf: Sort bpf kallsyms symbols
  2020-02-08 15:42 ` [PATCH 14/14] bpf: Sort bpf kallsyms symbols Jiri Olsa
@ 2020-02-11 19:12   ` Andrii Nakryiko
  0 siblings, 0 replies; 36+ messages in thread
From: Andrii Nakryiko @ 2020-02-11 19:12 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Currently we don't sort bpf_kallsyms and display symbols
> in proc/kallsyms as they come in via __bpf_ksym_add.
>
> Using the latch tree to get the next bpf_ksym object
> and insert the new symbol ahead of it.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  kernel/bpf/core.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 50af5dcf7ff9..c63ff34b2128 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -651,9 +651,30 @@ static struct latch_tree_root bpf_progs_tree __cacheline_aligned;
>
>  static void __bpf_ksym_add(struct bpf_ksym *ksym)
>  {
> +       struct list_head *head = &bpf_kallsyms;
> +
>         WARN_ON_ONCE(!list_empty(&ksym->lnode));
> -       list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
>         latch_tree_insert(&ksym->tnode, &bpf_kallsyms_tree, &bpf_kallsyms_tree_ops);
> +
> +       /*
> +        * Add ksym into bpf_kallsyms in ordered position,
> +        * which is prepared for us by latch tree addition.
> +        *
> +        * Find out the next symbol and insert ksym right
> +        * ahead of it. If ksym is the last one, just tail
> +        * add to the bpf_kallsyms.
> +        */
> +       if (!list_empty(&bpf_kallsyms)) {

nit: this is a bit redundant check (and unlikely to be false)

> +               struct rb_node *next = rb_next(&ksym->tnode.node[0]);
> +
> +               if (next) {
> +                       struct bpf_ksym *ptr;
> +
> +                       ptr = container_of(next, struct bpf_ksym, tnode.node[0]);
> +                       head = &ptr->lnode;
> +               }
> +       }
> +       list_add_tail_rcu(&ksym->lnode, head);
>  }
>
>  void bpf_ksym_add(struct bpf_ksym *ksym)
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
                   ` (14 preceding siblings ...)
  2020-02-10 15:51 ` [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Björn Töpel
@ 2020-02-11 19:13 ` Arnaldo Carvalho de Melo
  2020-02-12 10:46   ` Jiri Olsa
  15 siblings, 1 reply; 36+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-02-11 19:13 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, netdev, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

Em Sat, Feb 08, 2020 at 04:41:55PM +0100, Jiri Olsa escreveu:
> hi,
> this patchset adds trampoline and dispatcher objects
> to be visible in /proc/kallsyms. The last patch also
> adds sorting for all bpf objects in /proc/kallsyms.

This will allow those to appear in profiles, right? That would be
interesting to explicitely state, i.e. the _why_ of this patch, not just
the _what_.

Thanks,

- Arnaldo
 
>   $ sudo cat /proc/kallsyms | tail -20
>   ...
>   ffffffffa050f000 t bpf_prog_5a2b06eab81b8f51    [bpf]
>   ffffffffa0511000 t bpf_prog_6deef7357e7b4530    [bpf]
>   ffffffffa0542000 t bpf_trampoline_13832 [bpf]
>   ffffffffa0548000 t bpf_prog_96f1b5bf4e4cc6dc_mutex_lock [bpf]
>   ffffffffa0572000 t bpf_prog_d1c63e29ad82c4ab_bpf_prog1  [bpf]
>   ffffffffa0585000 t bpf_prog_e314084d332a5338__dissect   [bpf]
>   ffffffffa0587000 t bpf_prog_59785a79eac7e5d2_mutex_unlock       [bpf]
>   ffffffffa0589000 t bpf_prog_d0db6e0cac050163_mutex_lock [bpf]
>   ffffffffa058d000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
>   ffffffffa05df000 t bpf_trampoline_25637 [bpf]
>   ffffffffa05e3000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
>   ffffffffa05e5000 t bpf_prog_3b185187f1855c4c    [bpf]
>   ffffffffa05e7000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
>   ffffffffa05eb000 t bpf_prog_93cebb259dd5c4b2_do_sys_open        [bpf]
>   ffffffffa0677000 t bpf_dispatcher_xdp   [bpf]
> 
> thanks,
> jirka
> 
> 
> ---
> Björn Töpel (1):
>       bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER
> 
> Jiri Olsa (13):
>       x86/mm: Rename is_kernel_text to __is_kernel_text
>       bpf: Add struct bpf_ksym
>       bpf: Add name to struct bpf_ksym
>       bpf: Add lnode list node to struct bpf_ksym
>       bpf: Add bpf_kallsyms_tree tree
>       bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del
>       bpf: Separate kallsyms add/del functions
>       bpf: Add bpf_ksym_add/del functions
>       bpf: Re-initialize lnode in bpf_ksym_del
>       bpf: Rename bpf_tree to bpf_progs_tree
>       bpf: Add trampolines to kallsyms
>       bpf: Add dispatchers to kallsyms
>       bpf: Sort bpf kallsyms symbols
> 
>  arch/x86/mm/init_32.c   |  14 ++++++----
>  include/linux/bpf.h     |  54 ++++++++++++++++++++++++++------------
>  include/linux/filter.h  |  13 +++-------
>  kernel/bpf/core.c       | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
>  kernel/bpf/dispatcher.c |   6 +++++
>  kernel/bpf/trampoline.c |  23 ++++++++++++++++
>  kernel/events/core.c    |   4 +--
>  net/core/filter.c       |   5 ++--
>  8 files changed, 219 insertions(+), 82 deletions(-)
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-10 16:17   ` Jiri Olsa
@ 2020-02-11 19:32     ` Arnaldo Carvalho de Melo
  2020-02-12 11:13       ` Jiri Olsa
  0 siblings, 1 reply; 36+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-02-11 19:32 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Björn Töpel, Jiri Olsa, Alexei Starovoitov,
	Daniel Borkmann, Netdev, bpf, Andrii Nakryiko, Yonghong Song,
	Song Liu, Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Em Mon, Feb 10, 2020 at 05:17:51PM +0100, Jiri Olsa escreveu:
> On Mon, Feb 10, 2020 at 04:51:08PM +0100, Björn Töpel wrote:
> > On Sat, 8 Feb 2020 at 16:42, Jiri Olsa <jolsa@kernel.org> wrote:
> > > this patchset adds trampoline and dispatcher objects
> > > to be visible in /proc/kallsyms. The last patch also
> > > adds sorting for all bpf objects in /proc/kallsyms.

> > Thanks for working on this!

> > I'm probably missing something with my perf setup; I've applied your
> > patches, and everything seem to work fine from an kallsyms
> > perspective:

> > # grep bpf_dispatcher_xdp /proc/kallsyms
> > ...
> > ffffffffc0511000 t bpf_dispatcher_xdp   [bpf]
> > 
> > However, when I run
> > # perf top
> > 
> > I still see the undecorated one:
> > 0.90%  [unknown]                  [k] 0xffffffffc0511037
> > 
> > Any ideas?
 
> yea strange.. it should be picked up from /proc/kallsyms as
> fallback if there's no other source, I'll check on that
> (might be the problem with perf depending on address going
> only higher in /proc/kallsyms, while bpf symbols are at the
> end and start over from the lowest bpf address)
> 
> anyway, in perf we enumerate bpf_progs via the perf events
> PERF_BPF_EVENT_PROG_LOAD,PERF_BPF_EVENT_PROG_UNLOAD interface
> together with PERF_RECORD_KSYMBOL_TYPE_BPF events
> 
> we might need to add something like:
>   PERF_RECORD_KSYMBOL_TYPE_BPF_TRAMPOLINE
>   PERF_RECORD_KSYMBOL_TYPE_BPF_DISPATCHER
> 
> to notify about the area, I'll check on that
> 
> however the /proc/kallsyms fallback should work in any
> case.. thanks for report ;-)

We should by now move kallsyms to be the preferred source of symbols,
not vmlinux, right?

Perhaps what is happening is:

[root@quaco ~]# strace -f -e open,openat -o /tmp/bla perf top
[root@quaco ~]# grep vmlinux /tmp/bla
11013 openat(AT_FDCWD, "vmlinux", O_RDONLY) = -1 ENOENT (No such file or directory)
11013 openat(AT_FDCWD, "/boot/vmlinux", O_RDONLY) = -1 ENOENT (No such file or directory)
11013 openat(AT_FDCWD, "/boot/vmlinux-5.5.0+", O_RDONLY) = -1 ENOENT (No such file or directory)
11013 openat(AT_FDCWD, "/usr/lib/debug/boot/vmlinux-5.5.0+", O_RDONLY) = -1 ENOENT (No such file or directory)
11013 openat(AT_FDCWD, "/lib/modules/5.5.0+/build/vmlinux", O_RDONLY) = 152
[root@quaco ~]#

I.e. it is using vmlinux for resolving symbols and he should try with:

[root@quaco ~]# strace -f -e open,openat -o /tmp/bla perf top --ignore-vmlinux
[root@quaco ~]# perf top -h vmlinux

 Usage: perf top [<options>]

    -k, --vmlinux <file>  vmlinux pathname
        --ignore-vmlinux  don't load vmlinux even if found

[root@quaco ~]# grep vmlinux /tmp/bla
[root@quaco ~]#

Historically vmlinux was preferred because it contains function sizes,
but with all these out of the blue symbols, we need to prefer starting
with /proc/kallsyms and, as we do now, continue getting updates via
PERF_RECORD_KSYMBOL.

Humm, but then trampolines don't generate that, right? Or does it? If it
doesn't, then we will know about just the trampolines in place when the
record/top session starts, reparsing /proc/kallsyms periodically seems
excessive?

- Arnaldo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del
  2020-02-11 18:28   ` Andrii Nakryiko
@ 2020-02-12 10:43     ` Jiri Olsa
  0 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 10:43 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Tue, Feb 11, 2020 at 10:28:50AM -0800, Andrii Nakryiko wrote:
> On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > When bpf_prog is removed from kallsyms it's on the way
> > out to be removed, so we don't care about lnode state.
> >
> > However the bpf_ksym_del will be used also by bpf_trampoline
> > and bpf_dispatcher objects, which stay allocated even when
> > they are not in kallsyms list, hence the lnode re-init.
> >
> > The list_del_rcu commentary states that we need to call
> > synchronize_rcu, before we can change/re-init the list_head
> > pointers.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> 
> Wouldn't it make more sense to have patches 7 though 10 as a one
> patch? It's a generalization of ksym from being bpf_prog-specific to
> be more general (which this initialization fix is part of, arguably).

it was my initial change ;-) but then I realized I have to explain
several things in the changelog, and that's usually the sign that
you need to split the patch.. also I think now it's easier for review
and backporting

so I prefer it split like this, but if you guys want to squash it
together, I'll do it ;-)

jirka

> 
> >  kernel/bpf/core.c | 7 +++++++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index 73242fd07893..66b17bea286e 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -676,6 +676,13 @@ void bpf_ksym_del(struct bpf_ksym *ksym)
> >         spin_lock_bh(&bpf_lock);
> >         __bpf_ksym_del(ksym);
> >         spin_unlock_bh(&bpf_lock);
> > +
> > +       /*
> > +        * As explained in list_del_rcu, We must call synchronize_rcu
> > +        * before changing list_head pointers.
> > +        */
> > +       synchronize_rcu();
> > +       INIT_LIST_HEAD_RCU(&ksym->lnode);
> >  }
> >
> >  static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
> > --
> > 2.24.1
> >
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-11 19:13 ` Arnaldo Carvalho de Melo
@ 2020-02-12 10:46   ` Jiri Olsa
  0 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 10:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, netdev, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Tue, Feb 11, 2020 at 04:13:47PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Sat, Feb 08, 2020 at 04:41:55PM +0100, Jiri Olsa escreveu:
> > hi,
> > this patchset adds trampoline and dispatcher objects
> > to be visible in /proc/kallsyms. The last patch also
> > adds sorting for all bpf objects in /proc/kallsyms.
> 
> This will allow those to appear in profiles, right? That would be

yea, one would think so.. but as you saw in the other email
there are still some issues ;-)

> interesting to explicitely state, i.e. the _why_ of this patch, not just
> the _what_.

I guess another reason would be accountability of the kernel space,
so that everything with the symbol would appear in /proc/kallsyms

jirka

> 
> Thanks,
> 
> - Arnaldo
>  
> >   $ sudo cat /proc/kallsyms | tail -20
> >   ...
> >   ffffffffa050f000 t bpf_prog_5a2b06eab81b8f51    [bpf]
> >   ffffffffa0511000 t bpf_prog_6deef7357e7b4530    [bpf]
> >   ffffffffa0542000 t bpf_trampoline_13832 [bpf]
> >   ffffffffa0548000 t bpf_prog_96f1b5bf4e4cc6dc_mutex_lock [bpf]
> >   ffffffffa0572000 t bpf_prog_d1c63e29ad82c4ab_bpf_prog1  [bpf]
> >   ffffffffa0585000 t bpf_prog_e314084d332a5338__dissect   [bpf]
> >   ffffffffa0587000 t bpf_prog_59785a79eac7e5d2_mutex_unlock       [bpf]
> >   ffffffffa0589000 t bpf_prog_d0db6e0cac050163_mutex_lock [bpf]
> >   ffffffffa058d000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
> >   ffffffffa05df000 t bpf_trampoline_25637 [bpf]
> >   ffffffffa05e3000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
> >   ffffffffa05e5000 t bpf_prog_3b185187f1855c4c    [bpf]
> >   ffffffffa05e7000 t bpf_prog_d8f047721e4d8321_bpf_prog2  [bpf]
> >   ffffffffa05eb000 t bpf_prog_93cebb259dd5c4b2_do_sys_open        [bpf]
> >   ffffffffa0677000 t bpf_dispatcher_xdp   [bpf]
> > 
> > thanks,
> > jirka
> > 
> > 
> > ---
> > Björn Töpel (1):
> >       bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER
> > 
> > Jiri Olsa (13):
> >       x86/mm: Rename is_kernel_text to __is_kernel_text
> >       bpf: Add struct bpf_ksym
> >       bpf: Add name to struct bpf_ksym
> >       bpf: Add lnode list node to struct bpf_ksym
> >       bpf: Add bpf_kallsyms_tree tree
> >       bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del
> >       bpf: Separate kallsyms add/del functions
> >       bpf: Add bpf_ksym_add/del functions
> >       bpf: Re-initialize lnode in bpf_ksym_del
> >       bpf: Rename bpf_tree to bpf_progs_tree
> >       bpf: Add trampolines to kallsyms
> >       bpf: Add dispatchers to kallsyms
> >       bpf: Sort bpf kallsyms symbols
> > 
> >  arch/x86/mm/init_32.c   |  14 ++++++----
> >  include/linux/bpf.h     |  54 ++++++++++++++++++++++++++------------
> >  include/linux/filter.h  |  13 +++-------
> >  kernel/bpf/core.c       | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
> >  kernel/bpf/dispatcher.c |   6 +++++
> >  kernel/bpf/trampoline.c |  23 ++++++++++++++++
> >  kernel/events/core.c    |   4 +--
> >  net/core/filter.c       |   5 ++--
> >  8 files changed, 219 insertions(+), 82 deletions(-)
> > 
> 
> -- 
> 
> - Arnaldo
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree
  2020-02-11 18:21   ` Andrii Nakryiko
@ 2020-02-12 10:49     ` Jiri Olsa
  0 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 10:49 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Tue, Feb 11, 2020 at 10:21:10AM -0800, Andrii Nakryiko wrote:
> On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > The bpf_tree is used both for kallsyms iterations and searching
> > for exception tables of bpf programs, which is needed only for
> > bpf programs.
> >
> > Adding bpf_kallsyms_tree that will hold symbols for all bpf_prog,
> > bpf_trampoline and bpf_dispatcher objects and keeping bpf_tree
> > only for bpf_prog objects exception tables search to keep it fast.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  include/linux/bpf.h |  1 +
> >  kernel/bpf/core.c   | 60 ++++++++++++++++++++++++++++++++++++++++-----
> >  2 files changed, 55 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index da67ca3afa2f..151d7b1c8435 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -468,6 +468,7 @@ struct bpf_ksym {
> >         unsigned long            end;
> >         char                     name[KSYM_NAME_LEN];
> >         struct list_head         lnode;
> > +       struct latch_tree_node   tnode;
> >  };
> >
> >  enum bpf_tramp_prog_type {
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index b9b7077e60f3..1daa72341450 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -606,8 +606,46 @@ static const struct latch_tree_ops bpf_tree_ops = {
> >         .comp   = bpf_tree_comp,
> >  };
> >
> > +static __always_inline unsigned long
> > +bpf_get_ksym_start(struct latch_tree_node *n)
> 
> I thought static functions are never marked as inline in kernel
> sources. Are there some special cases when its ok/necessary?

I followed the other latch tree ops functions and did not think
much about that.. will check

> 
> > +{
> > +       const struct bpf_ksym *ksym;
> > +
> > +       ksym = container_of(n, struct bpf_ksym, tnode);
> > +       return ksym->start;
> > +}
> > +
> > +static __always_inline bool
> > +bpf_ksym_tree_less(struct latch_tree_node *a,
> > +                  struct latch_tree_node *b)
> > +{
> > +       return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
> > +}
> > +
> > +static __always_inline int
> > +bpf_ksym_tree_comp(void *key, struct latch_tree_node *n)
> > +{
> > +       unsigned long val = (unsigned long)key;
> > +       const struct bpf_ksym *ksym;
> > +
> > +       ksym = container_of(n, struct bpf_ksym, tnode);
> > +
> > +       if (val < ksym->start)
> > +               return -1;
> > +       if (val >= ksym->end)
> > +               return  1;
> > +
> > +       return 0;
> > +}
> > +
> > +static const struct latch_tree_ops bpf_kallsyms_tree_ops = {
> 
> Given all the helper functions use bpf_ksym_tree and bpf_ksym
> (bpf_ksym_find) prefixes, call this bpf_ksym_tree_ops?

right, should be bpf_ksym_tree_ops as you said

> 
> > +       .less   = bpf_ksym_tree_less,
> > +       .comp   = bpf_ksym_tree_comp,
> > +};
> > +
> >  static DEFINE_SPINLOCK(bpf_lock);
> >  static LIST_HEAD(bpf_kallsyms);
> > +static struct latch_tree_root bpf_kallsyms_tree __cacheline_aligned;
> 
> same as above, bpf_ksym_tree for consistency?

right, thanks

jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 13/14] bpf: Add dispatchers to kallsyms
  2020-02-11 19:03   ` Andrii Nakryiko
@ 2020-02-12 10:52     ` Jiri Olsa
  0 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 10:52 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Tue, Feb 11, 2020 at 11:03:23AM -0800, Andrii Nakryiko wrote:
> On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > Adding dispatchers to kallsyms. It's displayed as
> >   bpf_dispatcher_<NAME>
> >
> > where NAME is the name of dispatcher.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  include/linux/bpf.h     | 19 ++++++++++++-------
> >  kernel/bpf/dispatcher.c |  6 ++++++
> >  2 files changed, 18 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index b91bac10d3ea..837cdc093d2c 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -520,6 +520,7 @@ struct bpf_dispatcher {
> >         int num_progs;
> >         void *image;
> >         u32 image_off;
> > +       struct bpf_ksym ksym;
> >  };
> >
> >  static __always_inline unsigned int bpf_dispatcher_nop_func(
> > @@ -535,13 +536,17 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
> >  int bpf_trampoline_link_prog(struct bpf_prog *prog);
> >  int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
> >  void bpf_trampoline_put(struct bpf_trampoline *tr);
> > -#define BPF_DISPATCHER_INIT(name) {                    \
> > -       .mutex = __MUTEX_INITIALIZER(name.mutex),       \
> > -       .func = &name##_func,                           \
> > -       .progs = {},                                    \
> > -       .num_progs = 0,                                 \
> > -       .image = NULL,                                  \
> > -       .image_off = 0                                  \
> > +#define BPF_DISPATCHER_INIT(_name) {                           \
> > +       .mutex = __MUTEX_INITIALIZER(_name.mutex),              \
> > +       .func = &_name##_func,                                  \
> > +       .progs = {},                                            \
> > +       .num_progs = 0,                                         \
> > +       .image = NULL,                                          \
> > +       .image_off = 0,                                         \
> > +       .ksym = {                                               \
> > +               .name = #_name,                                 \
> > +               .lnode = LIST_HEAD_INIT(_name.ksym.lnode),      \
> > +       },                                                      \
> >  }
> >
> >  #define DEFINE_BPF_DISPATCHER(name)                                    \
> > diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
> > index b3e5b214fed8..8771d2cc5840 100644
> > --- a/kernel/bpf/dispatcher.c
> > +++ b/kernel/bpf/dispatcher.c
> > @@ -152,6 +152,12 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
> >         if (!changed)
> >                 goto out;
> >
> > +       if (!prev_num_progs)
> > +               bpf_image_ksym_add(d->image, &d->ksym);
> > +
> > +       if (!d->num_progs)
> > +               bpf_ksym_del(&d->ksym);
> > +
> >         bpf_dispatcher_update(d, prev_num_progs);
> 
> On slightly unrelated note: seems like bpf_dispatcher_update won't
> propagate any lower-level errors back, which seems pretty bad as a
> bunch of stuff can go wrong.
> 
> Björn, was it a conscious decision or this just slipped through the cracks?
> 
> Jiri, reason I started looking at this was twofold:
> 1. you add/remove symbol before dispatcher is updated, which is
> different order from BPF trampoline updates. I think updating symbols
> after successful update makes more sense, no?

right, I guess I did not care, because there's no error returned
from bpf_dispatcher_update as you pointed out.. I'll check if we
can add that and add/del symbols afterwards

> 2. I was wondering if bpf_dispatcher_update() could return 0/1 (and <0
> on error, of course), depending on whether dispatcher is present or
> not. Though I'm not hard set on this.

yes, that might be a way.. I'll check

thanks,
jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/14] bpf: Add trampolines to kallsyms
  2020-02-11 18:51   ` Andrii Nakryiko
@ 2020-02-12 11:10     ` Jiri Olsa
  2020-02-12 16:33       ` Andrii Nakryiko
  0 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 11:10 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Tue, Feb 11, 2020 at 10:51:27AM -0800, Andrii Nakryiko wrote:
> On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > Adding trampolines to kallsyms. It's displayed as
> >   bpf_trampoline_<ID> [bpf]
> >
> > where ID is the BTF id of the trampoline function.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  include/linux/bpf.h     |  2 ++
> >  kernel/bpf/trampoline.c | 23 +++++++++++++++++++++++
> >  2 files changed, 25 insertions(+)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 7a4626c8e747..b91bac10d3ea 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -502,6 +502,7 @@ struct bpf_trampoline {
> >         /* Executable image of trampoline */
> >         void *image;
> >         u64 selector;
> > +       struct bpf_ksym ksym;
> >  };
> >
> >  #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
> > @@ -573,6 +574,7 @@ struct bpf_image {
> >  #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
> >  bool is_bpf_image_address(unsigned long address);
> >  void *bpf_image_alloc(void);
> > +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
> >  /* Called only from code, so there's no need for stubs. */
> >  void bpf_ksym_add(struct bpf_ksym *ksym);
> >  void bpf_ksym_del(struct bpf_ksym *ksym);
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index 6b264a92064b..1ee29907cbe5 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> > @@ -96,6 +96,15 @@ bool is_bpf_image_address(unsigned long addr)
> >         return ret;
> >  }
> >
> > +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
> > +{
> > +       struct bpf_image *image = container_of(data, struct bpf_image, data);
> > +
> > +       ksym->start = (unsigned long) image;
> > +       ksym->end = ksym->start + PAGE_SIZE;
> 
> this seems wrong, use BPF_IMAGE_SIZE instead of PAGE_SIZE?

BPF_IMAGE_SIZE is the size of the data portion of the image,
which is PAGE_SIZE - sizeof(struct bpf_image)

here we want to account the whole size = data + tree node (struct bpf_image)

> 
> > +       bpf_ksym_add(ksym);
> > +}
> > +
> >  struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> >  {
> >         struct bpf_trampoline *tr;
> > @@ -131,6 +140,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> >         for (i = 0; i < BPF_TRAMP_MAX; i++)
> >                 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
> >         tr->image = image;
> > +       INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
> >  out:
> >         mutex_unlock(&trampoline_mutex);
> >         return tr;
> > @@ -267,6 +277,15 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
> >         }
> >  }
> >
> > +static void bpf_trampoline_kallsyms_add(struct bpf_trampoline *tr)
> > +{
> > +       struct bpf_ksym *ksym = &tr->ksym;
> > +
> > +       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu",
> > +                tr->key & ((u64) (1LU << 32) - 1));
> 
> why the 32-bit truncation? also, wouldn't it be more trivial as (u32)tr->key?

tr->key can have the target prog id in upper 32 bits,
I'll try to use the casting as you suggest

> 
> > +       bpf_image_ksym_add(tr->image, &tr->ksym);
> > +}
> > +
> >  int bpf_trampoline_link_prog(struct bpf_prog *prog)
> >  {
> >         enum bpf_tramp_prog_type kind;
> > @@ -311,6 +330,8 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
> >         if (err) {
> >                 hlist_del(&prog->aux->tramp_hlist);
> >                 tr->progs_cnt[kind]--;
> > +       } else if (cnt == 0) {
> > +               bpf_trampoline_kallsyms_add(tr);
> 
> You didn't handle BPF_TRAMP_REPLACE case above.

ugh, right.. will add

> 
> Also this if (err) { ... } else if (cnt == 0) { } pattern is a bit
> convoluted. How about:
> 
> if (err) {
>    ... whatever ...
>    goto out;
> }
> if (cnt == 0) { ... }

yep, that's better

> 
> >         }
> >  out:
> >         mutex_unlock(&tr->mutex);
> > @@ -336,6 +357,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
> >         }
> >         hlist_del(&prog->aux->tramp_hlist);
> >         tr->progs_cnt[kind]--;
> > +       if (!(tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]))
> > +               bpf_ksym_del(&tr->ksym);
> 
> same, BPF_TRAMP_REPLACE case. I'd also introduce cnt for consistency
> with bpf_trampoline_link_prog?

ok, thanks a lot for comments

jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-11 19:32     ` Arnaldo Carvalho de Melo
@ 2020-02-12 11:13       ` Jiri Olsa
  2020-02-12 13:31         ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 11:13 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Björn Töpel, Jiri Olsa, Alexei Starovoitov,
	Daniel Borkmann, Netdev, bpf, Andrii Nakryiko, Yonghong Song,
	Song Liu, Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

On Tue, Feb 11, 2020 at 04:32:23PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Feb 10, 2020 at 05:17:51PM +0100, Jiri Olsa escreveu:
> > On Mon, Feb 10, 2020 at 04:51:08PM +0100, Björn Töpel wrote:
> > > On Sat, 8 Feb 2020 at 16:42, Jiri Olsa <jolsa@kernel.org> wrote:
> > > > this patchset adds trampoline and dispatcher objects
> > > > to be visible in /proc/kallsyms. The last patch also
> > > > adds sorting for all bpf objects in /proc/kallsyms.
> 
> > > Thanks for working on this!
> 
> > > I'm probably missing something with my perf setup; I've applied your
> > > patches, and everything seem to work fine from an kallsyms
> > > perspective:
> 
> > > # grep bpf_dispatcher_xdp /proc/kallsyms
> > > ...
> > > ffffffffc0511000 t bpf_dispatcher_xdp   [bpf]
> > > 
> > > However, when I run
> > > # perf top
> > > 
> > > I still see the undecorated one:
> > > 0.90%  [unknown]                  [k] 0xffffffffc0511037
> > > 
> > > Any ideas?
>  
> > yea strange.. it should be picked up from /proc/kallsyms as
> > fallback if there's no other source, I'll check on that
> > (might be the problem with perf depending on address going
> > only higher in /proc/kallsyms, while bpf symbols are at the
> > end and start over from the lowest bpf address)
> > 
> > anyway, in perf we enumerate bpf_progs via the perf events
> > PERF_BPF_EVENT_PROG_LOAD,PERF_BPF_EVENT_PROG_UNLOAD interface
> > together with PERF_RECORD_KSYMBOL_TYPE_BPF events
> > 
> > we might need to add something like:
> >   PERF_RECORD_KSYMBOL_TYPE_BPF_TRAMPOLINE
> >   PERF_RECORD_KSYMBOL_TYPE_BPF_DISPATCHER
> > 
> > to notify about the area, I'll check on that
> > 
> > however the /proc/kallsyms fallback should work in any
> > case.. thanks for report ;-)
> 
> We should by now move kallsyms to be the preferred source of symbols,
> not vmlinux, right?
> 
> Perhaps what is happening is:
> 
> [root@quaco ~]# strace -f -e open,openat -o /tmp/bla perf top
> [root@quaco ~]# grep vmlinux /tmp/bla
> 11013 openat(AT_FDCWD, "vmlinux", O_RDONLY) = -1 ENOENT (No such file or directory)
> 11013 openat(AT_FDCWD, "/boot/vmlinux", O_RDONLY) = -1 ENOENT (No such file or directory)
> 11013 openat(AT_FDCWD, "/boot/vmlinux-5.5.0+", O_RDONLY) = -1 ENOENT (No such file or directory)
> 11013 openat(AT_FDCWD, "/usr/lib/debug/boot/vmlinux-5.5.0+", O_RDONLY) = -1 ENOENT (No such file or directory)
> 11013 openat(AT_FDCWD, "/lib/modules/5.5.0+/build/vmlinux", O_RDONLY) = 152
> [root@quaco ~]#
> 
> I.e. it is using vmlinux for resolving symbols and he should try with:
> 
> [root@quaco ~]# strace -f -e open,openat -o /tmp/bla perf top --ignore-vmlinux
> [root@quaco ~]# perf top -h vmlinux
> 
>  Usage: perf top [<options>]
> 
>     -k, --vmlinux <file>  vmlinux pathname
>         --ignore-vmlinux  don't load vmlinux even if found
> 
> [root@quaco ~]# grep vmlinux /tmp/bla
> [root@quaco ~]#
> 
> Historically vmlinux was preferred because it contains function sizes,
> but with all these out of the blue symbols, we need to prefer starting
> with /proc/kallsyms and, as we do now, continue getting updates via
> PERF_RECORD_KSYMBOL.
> 
> Humm, but then trampolines don't generate that, right? Or does it? If it
> doesn't, then we will know about just the trampolines in place when the
> record/top session starts, reparsing /proc/kallsyms periodically seems
> excessive?

I plan to extend the KSYMBOL interface to contain trampolines/dispatcher
data, plus we could do some inteligent fallback to /proc/kallsyms in case
vmlinux won't have anything

jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-12 11:13       ` Jiri Olsa
@ 2020-02-12 13:31         ` Arnaldo Carvalho de Melo
  2020-02-12 22:40           ` Jiri Olsa
  0 siblings, 1 reply; 36+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-02-12 13:31 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Björn Töpel, Jiri Olsa, Alexei Starovoitov,
	Daniel Borkmann, Netdev, bpf, Andrii Nakryiko, Yonghong Song,
	Song Liu, Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

Em Wed, Feb 12, 2020 at 12:13:46PM +0100, Jiri Olsa escreveu:
> On Tue, Feb 11, 2020 at 04:32:23PM -0300, Arnaldo Carvalho de Melo wrote:
> > Historically vmlinux was preferred because it contains function sizes,
> > but with all these out of the blue symbols, we need to prefer starting
> > with /proc/kallsyms and, as we do now, continue getting updates via
> > PERF_RECORD_KSYMBOL.

> > Humm, but then trampolines don't generate that, right? Or does it? If it
> > doesn't, then we will know about just the trampolines in place when the
> > record/top session starts, reparsing /proc/kallsyms periodically seems
> > excessive?

> I plan to extend the KSYMBOL interface to contain trampolines/dispatcher
> data,

That seems like the sensible, without looking too much at all the
details, to do, yes.

> plus we could do some inteligent fallback to /proc/kallsyms in case
> vmlinux won't have anything

At this point what would be the good reason to prefer vmlinux instead of
going straight to using /proc/kallsyms?

We have support for taking a snapshot of it at 'perf top' start, i.e.
right at the point we need to resolve a kernel symbol, then we get
PERF_RECORD_KSYMBOL for things that gets in place after that.

And as well we save it to the build-id cache so that later, at 'perf
report/script' time we can resolve kernel symbols, etc.

vmlinux is just what is in there right before boot, after that, for
quite some time, _lots_ of stuff happens :-)

- Arnaldo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/14] bpf: Add trampolines to kallsyms
  2020-02-12 11:10     ` Jiri Olsa
@ 2020-02-12 16:33       ` Andrii Nakryiko
  2020-02-12 22:58         ` Jiri Olsa
  2020-02-12 23:02         ` Jiri Olsa
  0 siblings, 2 replies; 36+ messages in thread
From: Andrii Nakryiko @ 2020-02-12 16:33 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Wed, Feb 12, 2020 at 3:10 AM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Tue, Feb 11, 2020 at 10:51:27AM -0800, Andrii Nakryiko wrote:
> > On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > >
> > > Adding trampolines to kallsyms. It's displayed as
> > >   bpf_trampoline_<ID> [bpf]
> > >
> > > where ID is the BTF id of the trampoline function.
> > >
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > ---
> > >  include/linux/bpf.h     |  2 ++
> > >  kernel/bpf/trampoline.c | 23 +++++++++++++++++++++++
> > >  2 files changed, 25 insertions(+)
> > >
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index 7a4626c8e747..b91bac10d3ea 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -502,6 +502,7 @@ struct bpf_trampoline {
> > >         /* Executable image of trampoline */
> > >         void *image;
> > >         u64 selector;
> > > +       struct bpf_ksym ksym;
> > >  };
> > >
> > >  #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
> > > @@ -573,6 +574,7 @@ struct bpf_image {
> > >  #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
> > >  bool is_bpf_image_address(unsigned long address);
> > >  void *bpf_image_alloc(void);
> > > +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
> > >  /* Called only from code, so there's no need for stubs. */
> > >  void bpf_ksym_add(struct bpf_ksym *ksym);
> > >  void bpf_ksym_del(struct bpf_ksym *ksym);
> > > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > > index 6b264a92064b..1ee29907cbe5 100644
> > > --- a/kernel/bpf/trampoline.c
> > > +++ b/kernel/bpf/trampoline.c
> > > @@ -96,6 +96,15 @@ bool is_bpf_image_address(unsigned long addr)
> > >         return ret;
> > >  }
> > >
> > > +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
> > > +{
> > > +       struct bpf_image *image = container_of(data, struct bpf_image, data);
> > > +
> > > +       ksym->start = (unsigned long) image;
> > > +       ksym->end = ksym->start + PAGE_SIZE;
> >
> > this seems wrong, use BPF_IMAGE_SIZE instead of PAGE_SIZE?
>
> BPF_IMAGE_SIZE is the size of the data portion of the image,
> which is PAGE_SIZE - sizeof(struct bpf_image)
>
> here we want to account the whole size = data + tree node (struct bpf_image)

Why? Seems like the main use case for this is resolve IP to symbol
(function, dispatcher, trampoline, bpf program, etc). For this
purpose, you only need part of trampoline actually containing
executable code?

Also, for bpf_dispatcher in later patch, you are not including struct
bpf_dispatcher itself, you only include image, so if the idea is to
include all the code and supporting data structures, that already
failed for bpf_dispatcher (and can't even work for that case, due to
dispatcher and image not being part of the same blob of memory, so
you'll need two symbols).

So I guess it would be good to be clear on why we include these
symbols and not mix data and executable parts.

>
> >
> > > +       bpf_ksym_add(ksym);
> > > +}
> > > +
> > >  struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> > >  {
> > >         struct bpf_trampoline *tr;
> > > @@ -131,6 +140,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> > >         for (i = 0; i < BPF_TRAMP_MAX; i++)
> > >                 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
> > >         tr->image = image;
> > > +       INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
> > >  out:
> > >         mutex_unlock(&trampoline_mutex);
> > >         return tr;
> > > @@ -267,6 +277,15 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
> > >         }
> > >  }
> > >
> > > +static void bpf_trampoline_kallsyms_add(struct bpf_trampoline *tr)
> > > +{
> > > +       struct bpf_ksym *ksym = &tr->ksym;
> > > +
> > > +       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu",
> > > +                tr->key & ((u64) (1LU << 32) - 1));
> >
> > why the 32-bit truncation? also, wouldn't it be more trivial as (u32)tr->key?
>
> tr->key can have the target prog id in upper 32 bits,

True, but not clear why it's bad? It's not a security concern, because
those IDs are already exposed (you can dump them from bpftool). On the
other hand, by cutting out part of key, you make symbols potentially
ambiguous, with different trampolines marked with the same name in
kallsyms, which is just going to be confusing to users/tools.

> I'll try to use the casting as you suggest
>
> >
> > > +       bpf_image_ksym_add(tr->image, &tr->ksym);
> > > +}
> > > +
> > >  int bpf_trampoline_link_prog(struct bpf_prog *prog)
> > >  {
> > >         enum bpf_tramp_prog_type kind;
> > > @@ -311,6 +330,8 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
> > >         if (err) {
> > >                 hlist_del(&prog->aux->tramp_hlist);
> > >                 tr->progs_cnt[kind]--;
> > > +       } else if (cnt == 0) {
> > > +               bpf_trampoline_kallsyms_add(tr);
> >
> > You didn't handle BPF_TRAMP_REPLACE case above.
>
> ugh, right.. will add
>
> >
> > Also this if (err) { ... } else if (cnt == 0) { } pattern is a bit
> > convoluted. How about:
> >
> > if (err) {
> >    ... whatever ...
> >    goto out;
> > }
> > if (cnt == 0) { ... }
>
> yep, that's better
>
> >
> > >         }
> > >  out:
> > >         mutex_unlock(&tr->mutex);
> > > @@ -336,6 +357,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
> > >         }
> > >         hlist_del(&prog->aux->tramp_hlist);
> > >         tr->progs_cnt[kind]--;
> > > +       if (!(tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]))
> > > +               bpf_ksym_del(&tr->ksym);
> >
> > same, BPF_TRAMP_REPLACE case. I'd also introduce cnt for consistency
> > with bpf_trampoline_link_prog?
>
> ok, thanks a lot for comments

sure, you are welcome :)

>
> jirka
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-12 13:31         ` Arnaldo Carvalho de Melo
@ 2020-02-12 22:40           ` Jiri Olsa
  0 siblings, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 22:40 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Björn Töpel, Jiri Olsa, Alexei Starovoitov,
	Daniel Borkmann, Netdev, bpf, Andrii Nakryiko, Yonghong Song,
	Song Liu, Martin KaFai Lau, Jakub Kicinski, David Miller,
	Björn Töpel, John Fastabend, Jesper Dangaard Brouer

On Wed, Feb 12, 2020 at 10:31:25AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Feb 12, 2020 at 12:13:46PM +0100, Jiri Olsa escreveu:
> > On Tue, Feb 11, 2020 at 04:32:23PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Historically vmlinux was preferred because it contains function sizes,
> > > but with all these out of the blue symbols, we need to prefer starting
> > > with /proc/kallsyms and, as we do now, continue getting updates via
> > > PERF_RECORD_KSYMBOL.
> 
> > > Humm, but then trampolines don't generate that, right? Or does it? If it
> > > doesn't, then we will know about just the trampolines in place when the
> > > record/top session starts, reparsing /proc/kallsyms periodically seems
> > > excessive?
> 
> > I plan to extend the KSYMBOL interface to contain trampolines/dispatcher
> > data,
> 
> That seems like the sensible, without looking too much at all the
> details, to do, yes.
> 
> > plus we could do some inteligent fallback to /proc/kallsyms in case
> > vmlinux won't have anything
> 
> At this point what would be the good reason to prefer vmlinux instead of
> going straight to using /proc/kallsyms?

symbol (with sizes) and code for dwarf unwind, processor trace

jirka

> 
> We have support for taking a snapshot of it at 'perf top' start, i.e.
> right at the point we need to resolve a kernel symbol, then we get
> PERF_RECORD_KSYMBOL for things that gets in place after that.
> 
> And as well we save it to the build-id cache so that later, at 'perf
> report/script' time we can resolve kernel symbols, etc.
> 
> vmlinux is just what is in there right before boot, after that, for
> quite some time, _lots_ of stuff happens :-)
> 
> - Arnaldo
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/14] bpf: Add trampolines to kallsyms
  2020-02-12 16:33       ` Andrii Nakryiko
@ 2020-02-12 22:58         ` Jiri Olsa
  2020-02-12 23:02         ` Jiri Olsa
  1 sibling, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 22:58 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Wed, Feb 12, 2020 at 08:33:49AM -0800, Andrii Nakryiko wrote:
> On Wed, Feb 12, 2020 at 3:10 AM Jiri Olsa <jolsa@redhat.com> wrote:
> >
> > On Tue, Feb 11, 2020 at 10:51:27AM -0800, Andrii Nakryiko wrote:
> > > On Sat, Feb 8, 2020 at 7:43 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > > >
> > > > Adding trampolines to kallsyms. It's displayed as
> > > >   bpf_trampoline_<ID> [bpf]
> > > >
> > > > where ID is the BTF id of the trampoline function.
> > > >
> > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > > ---
> > > >  include/linux/bpf.h     |  2 ++
> > > >  kernel/bpf/trampoline.c | 23 +++++++++++++++++++++++
> > > >  2 files changed, 25 insertions(+)
> > > >
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 7a4626c8e747..b91bac10d3ea 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -502,6 +502,7 @@ struct bpf_trampoline {
> > > >         /* Executable image of trampoline */
> > > >         void *image;
> > > >         u64 selector;
> > > > +       struct bpf_ksym ksym;
> > > >  };
> > > >
> > > >  #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
> > > > @@ -573,6 +574,7 @@ struct bpf_image {
> > > >  #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
> > > >  bool is_bpf_image_address(unsigned long address);
> > > >  void *bpf_image_alloc(void);
> > > > +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
> > > >  /* Called only from code, so there's no need for stubs. */
> > > >  void bpf_ksym_add(struct bpf_ksym *ksym);
> > > >  void bpf_ksym_del(struct bpf_ksym *ksym);
> > > > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > > > index 6b264a92064b..1ee29907cbe5 100644
> > > > --- a/kernel/bpf/trampoline.c
> > > > +++ b/kernel/bpf/trampoline.c
> > > > @@ -96,6 +96,15 @@ bool is_bpf_image_address(unsigned long addr)
> > > >         return ret;
> > > >  }
> > > >
> > > > +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
> > > > +{
> > > > +       struct bpf_image *image = container_of(data, struct bpf_image, data);
> > > > +
> > > > +       ksym->start = (unsigned long) image;
> > > > +       ksym->end = ksym->start + PAGE_SIZE;
> > >
> > > this seems wrong, use BPF_IMAGE_SIZE instead of PAGE_SIZE?
> >
> > BPF_IMAGE_SIZE is the size of the data portion of the image,
> > which is PAGE_SIZE - sizeof(struct bpf_image)
> >
> > here we want to account the whole size = data + tree node (struct bpf_image)
> 
> Why? Seems like the main use case for this is resolve IP to symbol
> (function, dispatcher, trampoline, bpf program, etc). For this
> purpose, you only need part of trampoline actually containing
> executable code?

right, executable code is enough for perf to resolve the symbol

> 
> Also, for bpf_dispatcher in later patch, you are not including struct
> bpf_dispatcher itself, you only include image, so if the idea is to
> include all the code and supporting data structures, that already
> failed for bpf_dispatcher (and can't even work for that case, due to
> dispatcher and image not being part of the same blob of memory, so
> you'll need two symbols).
> 
> So I guess it would be good to be clear on why we include these
> symbols and not mix data and executable parts.

ok it should be only the executable part then, there's more
on the data side that wasn't included and we don't need it

thanks,
jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/14] bpf: Add trampolines to kallsyms
  2020-02-12 16:33       ` Andrii Nakryiko
  2020-02-12 22:58         ` Jiri Olsa
@ 2020-02-12 23:02         ` Jiri Olsa
  1 sibling, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-12 23:02 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Networking, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Wed, Feb 12, 2020 at 08:33:49AM -0800, Andrii Nakryiko wrote:

SNIP

> > > >         tr->image = image;
> > > > +       INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
> > > >  out:
> > > >         mutex_unlock(&trampoline_mutex);
> > > >         return tr;
> > > > @@ -267,6 +277,15 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
> > > >         }
> > > >  }
> > > >
> > > > +static void bpf_trampoline_kallsyms_add(struct bpf_trampoline *tr)
> > > > +{
> > > > +       struct bpf_ksym *ksym = &tr->ksym;
> > > > +
> > > > +       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu",
> > > > +                tr->key & ((u64) (1LU << 32) - 1));
> > >
> > > why the 32-bit truncation? also, wouldn't it be more trivial as (u32)tr->key?
> >
> > tr->key can have the target prog id in upper 32 bits,
> 
> True, but not clear why it's bad? It's not a security concern, because
> those IDs are already exposed (you can dump them from bpftool). On the
> other hand, by cutting out part of key, you make symbols potentially
> ambiguous, with different trampolines marked with the same name in
> kallsyms, which is just going to be confusing to users/tools.

ugh ok, I did not see the target bpf program case clearly,
will include the whole tr->key

thanks,
jirka


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms
  2020-02-10 15:51 ` [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Björn Töpel
  2020-02-10 16:17   ` Jiri Olsa
@ 2020-02-13 16:23   ` Jiri Olsa
  1 sibling, 0 replies; 36+ messages in thread
From: Jiri Olsa @ 2020-02-13 16:23 UTC (permalink / raw)
  To: Björn Töpel
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Netdev, bpf,
	Andrii Nakryiko, Yonghong Song, Song Liu, Martin KaFai Lau,
	Jakub Kicinski, David Miller, Björn Töpel,
	John Fastabend, Jesper Dangaard Brouer

On Mon, Feb 10, 2020 at 04:51:08PM +0100, Björn Töpel wrote:
> On Sat, 8 Feb 2020 at 16:42, Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > hi,
> > this patchset adds trampoline and dispatcher objects
> > to be visible in /proc/kallsyms. The last patch also
> > adds sorting for all bpf objects in /proc/kallsyms.
> >
> 
> Thanks for working on this!
> 
> I'm probably missing something with my perf setup; I've applied your
> patches, and everything seem to work fine from an kallsyms
> perspective:
> 
> # grep bpf_dispatcher_xdp /proc/kallsyms
> ...
> ffffffffc0511000 t bpf_dispatcher_xdp   [bpf]
> 
> However, when I run
> # perf top
> 
> I still see the undecorated one:
> 0.90%  [unknown]                  [k] 0xffffffffc0511037
> 
> Any ideas?

heya,
the code is little rusty and needs some fixing :-\

with the patch below on top of Arnaldo's perf/urgent branch,
there's one workaround for now:

  # perf record --vmlinux /proc/kallsyms 
  ^C[ perf record: Woken up 0 times to write data ]
  [ perf record: Captured and wrote 18.954 MB perf.data (348693 samples) ]

  # perf report --kallsyms /proc/kallsyms | grep bpf_trampoline_13795
     0.01%  sched-messaging  kallsyms                                [k] bpf_trampoline_13795
     0.00%  perf             kallsyms                                [k] bpf_trampoline_13795
     0.00%  :47547           kallsyms                                [k] bpf_trampoline_13795
     0.00%  :47546           kallsyms                                [k] bpf_trampoline_13795
     0.00%  :47544           kallsyms                                [k] bpf_trampoline_13795

with recent kcore/vmlinux changes we neglected kallsyms fallback,
I'm preparing changes that will detect and use it automaticaly

jirka


---
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb5c2cd44d30..463ada5117f8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -742,6 +742,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
 		map->start = event->ksymbol.addr;
 		map->end = map->start + event->ksymbol.len;
 		maps__insert(&machine->kmaps, map);
+		dso__set_loaded(dso);
 	}
 
 	sym = symbol__new(map->map_ip(map, map->start),


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, back to index

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-08 15:41 [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Jiri Olsa
2020-02-08 15:41 ` [PATCH 01/14] x86/mm: Rename is_kernel_text to __is_kernel_text Jiri Olsa
2020-02-08 15:41 ` [PATCH 02/14] bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER Jiri Olsa
2020-02-08 15:41 ` [PATCH 03/14] bpf: Add struct bpf_ksym Jiri Olsa
2020-02-08 15:41 ` [PATCH 04/14] bpf: Add name to " Jiri Olsa
2020-02-08 15:42 ` [PATCH 05/14] bpf: Add lnode list node " Jiri Olsa
2020-02-08 15:42 ` [PATCH 06/14] bpf: Add bpf_kallsyms_tree tree Jiri Olsa
2020-02-11 18:21   ` Andrii Nakryiko
2020-02-12 10:49     ` Jiri Olsa
2020-02-08 15:42 ` [PATCH 07/14] bpf: Move bpf_tree add/del from bpf_prog_ksym_node_add/del Jiri Olsa
2020-02-08 15:42 ` [PATCH 08/14] bpf: Separate kallsyms add/del functions Jiri Olsa
2020-02-08 15:42 ` [PATCH 09/14] bpf: Add bpf_ksym_add/del functions Jiri Olsa
2020-02-08 15:42 ` [PATCH 10/14] bpf: Re-initialize lnode in bpf_ksym_del Jiri Olsa
2020-02-11 18:28   ` Andrii Nakryiko
2020-02-12 10:43     ` Jiri Olsa
2020-02-08 15:42 ` [PATCH 11/14] bpf: Rename bpf_tree to bpf_progs_tree Jiri Olsa
2020-02-08 15:42 ` [PATCH 12/14] bpf: Add trampolines to kallsyms Jiri Olsa
2020-02-11 18:51   ` Andrii Nakryiko
2020-02-12 11:10     ` Jiri Olsa
2020-02-12 16:33       ` Andrii Nakryiko
2020-02-12 22:58         ` Jiri Olsa
2020-02-12 23:02         ` Jiri Olsa
2020-02-08 15:42 ` [PATCH 13/14] bpf: Add dispatchers " Jiri Olsa
2020-02-11 19:03   ` Andrii Nakryiko
2020-02-12 10:52     ` Jiri Olsa
2020-02-08 15:42 ` [PATCH 14/14] bpf: Sort bpf kallsyms symbols Jiri Olsa
2020-02-11 19:12   ` Andrii Nakryiko
2020-02-10 15:51 ` [PATCH 00/14] bpf: Add trampoline and dispatcher to /proc/kallsyms Björn Töpel
2020-02-10 16:17   ` Jiri Olsa
2020-02-11 19:32     ` Arnaldo Carvalho de Melo
2020-02-12 11:13       ` Jiri Olsa
2020-02-12 13:31         ` Arnaldo Carvalho de Melo
2020-02-12 22:40           ` Jiri Olsa
2020-02-13 16:23   ` Jiri Olsa
2020-02-11 19:13 ` Arnaldo Carvalho de Melo
2020-02-12 10:46   ` Jiri Olsa

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org
	public-inbox-index bpf

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git