* [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-17 12:48 ` Peter Zijlstra
2019-01-17 12:56 ` Peter Zijlstra
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 2/9] sync tools/include/uapi/linux/perf_event.h Song Liu
` (7 subsequent siblings)
8 siblings, 2 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
For better performance analysis of dynamically JITed and loaded kernel
functions, such as BPF programs, this patch introduces
PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
register/unregister information to user space.
The following data structure is used for PERF_RECORD_KSYMBOL.
/*
* struct {
* struct perf_event_header header;
* u64 addr;
* u32 len;
* u16 ksym_type;
* u16 flags;
* char name[];
* struct sample_id sample_id;
* };
*/
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
include/linux/perf_event.h | 13 +++++
include/uapi/linux/perf_event.h | 26 ++++++++-
kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++-
3 files changed, 135 insertions(+), 2 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1d5c551a5add..77b2560f2dc7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1113,6 +1113,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
}
extern void perf_event_mmap(struct vm_area_struct *vma);
+
+/* callback function to generate ksymbol name */
+typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
+extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
+ bool unregister,
+ perf_ksymbol_get_name_f get_name, void *data);
+
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1333,6 +1340,12 @@ static inline int perf_unregister_guest_info_callbacks
(struct perf_guest_info_callbacks *callbacks) { return 0; }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
+
+typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
+static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
+ bool unregister,
+ perf_ksymbol_get_name_f get_name,
+ void *data) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9de8780ac8d9..68c4da0227c5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -372,7 +372,8 @@ struct perf_event_attr {
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
- __reserved_1 : 35;
+ ksymbol : 1, /* include ksymbol events */
+ __reserved_1 : 34;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -965,9 +966,32 @@ enum perf_event_type {
*/
PERF_RECORD_NAMESPACES = 16,
+ /*
+ * Record ksymbol register/unregister events:
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 addr;
+ * u32 len;
+ * u16 ksym_type;
+ * u16 flags;
+ * char name[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_KSYMBOL = 17,
+
PERF_RECORD_MAX, /* non-ABI */
};
+enum perf_record_ksymbol_type {
+ PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
+ PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
+ PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
+};
+
+#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
+
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cd13a30f732..ef27f2776999 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
+static atomic_t nr_ksymbol_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event)
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
attr->comm || attr->comm_exec ||
- attr->task ||
+ attr->task || attr->ksymbol ||
attr->context_switch)
return true;
return false;
@@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event)
dec = true;
if (has_branch_stack(event))
dec = true;
+ if (event->attr.ksymbol)
+ atomic_dec(&nr_ksymbol_events);
if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7650,6 +7653,97 @@ static void perf_log_throttle(struct perf_event *event, int enable)
perf_output_end(&handle);
}
+/*
+ * ksymbol register/unregister tracking
+ */
+
+struct perf_ksymbol_event {
+ const char *name;
+ int name_len;
+ struct {
+ struct perf_event_header header;
+ u64 addr;
+ u32 len;
+ u16 ksym_type;
+ u16 flags;
+ } event_id;
+};
+
+static int perf_event_ksymbol_match(struct perf_event *event)
+{
+ return event->attr.ksymbol;
+}
+
+static void perf_event_ksymbol_output(struct perf_event *event, void *data)
+{
+ struct perf_ksymbol_event *ksymbol_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_ksymbol_match(event))
+ return;
+
+ perf_event_header__init_id(&ksymbol_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ ksymbol_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, ksymbol_event->event_id);
+ __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
+ perf_ksymbol_get_name_f get_name, void *data)
+{
+ struct perf_ksymbol_event ksymbol_event;
+ char name[KSYM_NAME_LEN];
+ u16 flags = 0;
+ int name_len;
+
+ if (!atomic_read(&nr_ksymbol_events))
+ return;
+
+ if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
+ ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
+ goto err;
+
+ get_name(name, KSYM_NAME_LEN, data);
+ name_len = strlen(name) + 1;
+ while (!IS_ALIGNED(name_len, sizeof(u64)))
+ name[name_len++] = '\0';
+ BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
+
+ if (unregister)
+ flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
+
+ ksymbol_event = (struct perf_ksymbol_event){
+ .name = name,
+ .name_len = name_len,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = sizeof(ksymbol_event.event_id) +
+ name_len,
+ },
+ .addr = addr,
+ .len = len,
+ .ksym_type = ksym_type,
+ .flags = flags,
+ },
+ };
+
+ perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
+ return;
+err:
+ WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
+}
+
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
@@ -9900,6 +9994,8 @@ static void account_event(struct perf_event *event)
inc = true;
if (is_cgroup_event(event))
inc = true;
+ if (event->attr.ksymbol)
+ atomic_inc(&nr_ksymbol_events);
if (inc) {
/*
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL Song Liu
@ 2019-01-17 12:48 ` Peter Zijlstra
2019-01-17 12:56 ` Peter Zijlstra
1 sibling, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2019-01-17 12:48 UTC (permalink / raw)
To: Song Liu; +Cc: linux-kernel, netdev, acme, ast, daniel, kernel-team, dsahern
On Wed, Jan 16, 2019 at 08:29:23AM -0800, Song Liu wrote:
> +/* callback function to generate ksymbol name */
> +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
> +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
> + bool unregister,
> + perf_ksymbol_get_name_f get_name, void *data);
> +
I'm not liking the getname thing.. I know that's what BPF does, but can
we please do that in the caller or something?
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1114,11 +1114,8 @@ static inline void perf_event_task_sched
extern void perf_event_mmap(struct vm_area_struct *vma);
-/* callback function to generate ksymbol name */
-typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
- bool unregister,
- perf_ksymbol_get_name_f get_name, void *data);
+ bool unregister, const char *name);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1341,11 +1338,8 @@ static inline int perf_unregister_guest_
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
-typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
- bool unregister,
- perf_ksymbol_get_name_f get_name,
- void *data) { }
+ bool unregister, const char *name) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7698,8 +7698,7 @@ static void perf_event_ksymbol_output(st
perf_output_end(&handle);
}
-void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
- perf_ksymbol_get_name_f get_name, void *data)
+void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, const char *sym)
{
struct perf_ksymbol_event ksymbol_event;
char name[KSYM_NAME_LEN];
@@ -7713,7 +7712,7 @@ void perf_event_ksymbol(u16 ksym_type, u
ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
goto err;
- get_name(name, KSYM_NAME_LEN, data);
+ strlcpy(name, sym, KSYM_NAME_LEN);
name_len = strlen(name) + 1;
while (!IS_ALIGNED(name_len, sizeof(u64)))
name[name_len++] = '\0';
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL Song Liu
2019-01-17 12:48 ` Peter Zijlstra
@ 2019-01-17 12:56 ` Peter Zijlstra
2019-01-17 14:49 ` Song Liu
2019-01-18 8:41 ` Peter Zijlstra
1 sibling, 2 replies; 20+ messages in thread
From: Peter Zijlstra @ 2019-01-17 12:56 UTC (permalink / raw)
To: Song Liu
Cc: linux-kernel, netdev, acme, ast, daniel, kernel-team, dsahern,
Steven Rostedt
[-- Attachment #1: Type: text/plain, Size: 1620 bytes --]
On Wed, Jan 16, 2019 at 08:29:23AM -0800, Song Liu wrote:
> For better performance analysis of dynamically JITed and loaded kernel
> functions, such as BPF programs, this patch introduces
> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
> register/unregister information to user space.
>
> The following data structure is used for PERF_RECORD_KSYMBOL.
>
> /*
> * struct {
> * struct perf_event_header header;
> * u64 addr;
> * u32 len;
> * u16 ksym_type;
> * u16 flags;
> * char name[];
> * struct sample_id sample_id;
> * };
> */
So I've cobbled together the attached patches to see how it would work
out..
I didn't convert ftrace trampolines; because ftrace code has this
uncanny ability to make my head hurt. But I don't think it should be
hard, once one figures out the right structure to stick that
kallsym_node thing in (ftrace_ops ?!).
It is compiled only, so no testing what so ever (also, no changelogs).
I didn't wire up the KSYM_TYPE thing; I'm wondering if we really need
that, OTOH it really doesn't hurt having it either.
One weird thing I noticed, wth does bpf_prog_kallsyms_add() check
CAP_ADMIN ?! Surely even a non-priv JIT'ed program generates symbols,
why hide those?
Anyway; with the one nit about the get_names() thing sorted:
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
(thanks for sticking with this)
[-- Attachment #2: peterz-latch-next.patch --]
[-- Type: text/x-diff, Size: 1812 bytes --]
Subject:
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Jan 17 11:41:01 CET 2019
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/rbtree_latch.h | 48 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -211,4 +211,52 @@ latch_tree_find(void *key, struct latch_
return node;
}
+static __always_inline struct latch_tree_node *
+latch_tree_first(struct latch_tree_root *root)
+{
+ struct latch_tree_node *ltn = NULL;
+ struct rb_node *node;
+ unsigned int seq;
+
+ do {
+ struct rb_root *rbr;
+
+ seq = raw_read_seqcount_latch(&root->seq);
+ rbr = &root->tree[seq & 1];
+ node = rb_first(rbr);
+ } while (read_seqcount_retry(&root->seq, seq));
+
+ if (node)
+ ltn = __lt_from_rb(node, seq & 1);
+
+ return ltn;
+}
+
+/**
+ * latch_tree_next() - find the next @ltn in @root per sort order
+ * @root: trees to which @ltn belongs
+ * @ltn: nodes to start from
+ *
+ * Does a lockless lookup in the trees @root for the next node starting at
+ * @ltn.
+ *
+ * Using this function outside of the write side lock is rather dodgy but given
+ * latch_tree_erase() doesn't re-init the nodes and the whole iteration is done
+ * under a single RCU critical section, it should be non-fatal and generate some
+ * semblance of order - albeit possibly missing chunks of the tree.
+ */
+static __always_inline struct latch_tree_node *
+latch_tree_next(struct latch_tree_root *root, struct latch_tree_node *ltn)
+{
+ struct rb_node *node;
+ unsigned int seq;
+
+ do {
+ seq = raw_read_seqcount_latch(&root->seq);
+ node = rb_next(<n->node[seq & 1]);
+ } while (read_seqcount_retry(&root->seq, seq));
+
+ return __lt_from_rb(node, seq & 1);
+}
+
#endif /* RB_TREE_LATCH_H */
[-- Attachment #3: peterz-kallsym.patch --]
[-- Type: text/x-diff, Size: 7531 bytes --]
Subject:
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Jan 17 11:18:21 CET 2019
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/kallsyms.h | 14 +++
kernel/extable.c | 2
kernel/kallsyms.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 202 insertions(+), 1 deletion(-)
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -11,6 +11,7 @@
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/rbtree_latch.h>
#include <asm/sections.h>
@@ -20,6 +21,19 @@
struct module;
+struct kallsym_node
+{
+ struct latch_tree_node kn_node;
+ unsigned long kn_addr;
+ unsigned long kn_len;
+ void (*kn_names)(struct kallsym_node *kn, char *sym_name, char **mod_name);
+};
+
+extern void kallsym_tree_add(struct kallsym_node *kn);
+extern void kallsym_tree_del(struct kallsym_node *kn);
+
+extern bool is_kallsym_tree_text_address(unsigned long addr);
+
static inline int is_kernel_inittext(unsigned long addr)
{
if (addr >= (unsigned long)_sinittext
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -145,6 +145,8 @@ int kernel_text_address(unsigned long ad
if (is_module_text_address(addr))
goto out;
+ if (is_kallsym_tree_text_address(addr))
+ goto out;
if (is_ftrace_trampoline(addr))
goto out;
if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -24,6 +24,8 @@
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/perf_event.h>
/*
* These will be re-linked against their real values
@@ -48,6 +50,164 @@ extern const u16 kallsyms_token_index[]
extern const unsigned int kallsyms_markers[] __weak;
+static DEFINE_SPINLOCK(kallsym_lock);
+static struct latch_tree_root kallsym_tree __cacheline_aligned;
+
+static __always_inline unsigned long
+kallsym_node_addr(struct latch_tree_node *node)
+{
+ return ((struct kallsym_node *)node)->kn_addr;
+}
+
+static __always_inline bool kallsym_tree_less(struct latch_tree_node *a,
+ struct latch_tree_node *b)
+{
+ return kallsym_node_addr(a) < kallsym_node_addr(b);
+}
+
+static __always_inline int kallsym_tree_comp(void *key,
+ struct latch_tree_node *n)
+{
+ unsigned long val = (unsigned long)key;
+ unsigned long sym_start, sym_end;
+ const struct kallsym_node *kn;
+
+ kn = container_of(n, struct kallsym_node, kn_node);
+ sym_start = kn->kn_addr;
+ sym_end = sym_start + kn->kn_len;
+
+ if (val < sym_start)
+ return -1;
+ if (val >= sym_end)
+ return 1;
+
+ return 0;
+}
+
+static const struct latch_tree_ops kallsym_tree_ops = {
+ .less = kallsym_tree_less,
+ .comp = kallsym_tree_comp,
+};
+
+void kallsym_tree_add(struct kallsym_node *kn)
+{
+ char namebuf[KSYM_NAME_LEN] = "";
+ char *modname = NULL;
+
+ spin_lock_irq(&kallsym_lock);
+ latch_tree_insert(&kn->kn_node, &kallsym_tree, &kallsym_tree_ops);
+ spin_unlock_irq(&kallsym_lock);
+
+ kn->kn_names(kn, namebuf, &modname);
+
+ if (modname) {
+ int len = strlen(namebuf);
+
+ snprintf(namebuf + len, sizeof(namebuf) - len, " [%s]", modname);
+ }
+
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_UNKNOWN,
+ kn->kn_addr, kn->kn_len, false, namebuf);
+}
+
+void kallsym_tree_del(struct kallsym_node *kn)
+{
+ char namebuf[KSYM_NAME_LEN] = "";
+ char *modname = NULL;
+
+ kn->kn_names(kn, namebuf, &modname);
+
+ if (modname) {
+ int len = strlen(namebuf);
+
+ snprintf(namebuf + len, sizeof(namebuf) - len, " [%s]", modname);
+ }
+
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_UNKNOWN,
+ kn->kn_addr, kn->kn_len, true, namebuf);
+
+ spin_lock_irq(&kallsym_lock);
+ latch_tree_erase(&kn->kn_node, &kallsym_tree, &kallsym_tree_ops);
+ spin_unlock_irq(&kallsym_lock);
+}
+
+static struct kallsym_node *kallsym_tree_find(unsigned long addr)
+{
+ struct kallsym_node *kn = NULL;
+ struct latch_tree_node *n;
+
+ n = latch_tree_find((void *)addr, &kallsym_tree, &kallsym_tree_ops);
+ if (n)
+ kn = container_of(n, struct kallsym_node, kn_node);
+
+ return kn;
+}
+
+static char *kallsym_tree_address_lookup(unsigned long addr, unsigned long *size,
+ unsigned long *off, char **modname, char *sym)
+{
+ struct kallsym_node *kn;
+ char *ret = NULL;
+
+ rcu_read_lock();
+ kn = kallsym_tree_find(addr);
+ if (kn) {
+ kn->kn_names(kn, sym, modname);
+
+ ret = sym;
+ if (size)
+ *size = kn->kn_len;
+ if (off)
+ *off = addr - kn->kn_addr;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+bool is_kallsym_tree_text_address(unsigned long addr)
+{
+ bool ret;
+
+ rcu_read_lock();
+ ret = kallsym_tree_find(addr) != NULL;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int kallsym_tree_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ char *sym, char *modname, int *exported)
+{
+ struct latch_tree_node *ltn;
+ int i, ret = -ERANGE;
+
+ rcu_read_lock();
+ for (i = 0, ltn = latch_tree_first(&kallsym_tree); i < symnum && ltn;
+ i++, ltn = latch_tree_next(&kallsym_tree, ltn))
+ ;
+
+ if (ltn) {
+ struct kallsym_node *kn;
+ char *mod;
+
+ kn = container_of(ltn, struct kallsym_node, kn_node);
+
+ kn->kn_names(kn, sym, &mod);
+ if (mod)
+ strlcpy(modname, mod, MODULE_NAME_LEN);
+ else
+ modname[0] = '\0';
+
+ *type = 't';
+ *exported = 0;
+ ret = 0;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
/*
* Expand a compressed symbol data into the resulting uncompressed string,
* if uncompressed string is too long (>= maxlen), it will be truncated,
@@ -265,6 +425,7 @@ int kallsyms_lookup_size_offset(unsigned
if (is_ksym_addr(addr))
return !!get_symbol_pos(addr, symbolsize, offset);
return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) ||
+ !!kallsym_tree_address_lookup(addr, symbolsize, offset, NULL, namebuf) ||
!!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
}
@@ -301,6 +462,10 @@ const char *kallsyms_lookup(unsigned lon
ret = module_address_lookup(addr, symbolsize, offset,
modname, namebuf);
if (!ret)
+ ret = kallsym_tree_address_lookup(addr, symbolsize,
+ offset, modname, namebuf);
+
+ if (!ret)
ret = bpf_address_lookup(addr, symbolsize,
offset, modname, namebuf);
@@ -434,6 +599,7 @@ struct kallsym_iter {
loff_t pos;
loff_t pos_arch_end;
loff_t pos_mod_end;
+ loff_t pos_tree_end;
loff_t pos_ftrace_mod_end;
unsigned long value;
unsigned int nameoff; /* If iterating in core kernel symbols. */
@@ -478,9 +644,24 @@ static int get_ksymbol_mod(struct kallsy
return 1;
}
+static int get_ksymbol_tree(struct kallsym_iter *iter)
+{
+ int ret = kallsym_tree_kallsym(iter->pos - iter->pos_mod_end,
+ &iter->value, &iter->type,
+ iter->name, iter->module_name,
+ &iter->exported);
+
+ if (ret < 0) {
+ iter->pos_tree_end = iter->pos;
+ return 0;
+ }
+
+ return 1;
+}
+
static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
{
- int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end,
+ int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_tree_end,
&iter->value, &iter->type,
iter->name, iter->module_name,
&iter->exported);
@@ -545,6 +726,10 @@ static int update_iter_mod(struct kallsy
get_ksymbol_mod(iter))
return 1;
+ if ((!iter->pos_tree_end || iter->pos_tree_end > pos) &&
+ get_ksymbol_tree(iter))
+ return 1;
+
if ((!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > pos) &&
get_ksymbol_ftrace_mod(iter))
return 1;
[-- Attachment #4: peterz-kallsym-bpf.patch --]
[-- Type: text/x-diff, Size: 9920 bytes --]
Subject:
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Jan 17 13:19:25 CET 2019
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/bpf.h | 7 +-
include/linux/filter.h | 42 ------------
kernel/bpf/core.c | 164 ++++---------------------------------------------
kernel/extable.c | 4 -
kernel/kallsyms.c | 19 -----
5 files changed, 22 insertions(+), 214 deletions(-)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -13,7 +13,7 @@
#include <linux/file.h>
#include <linux/percpu.h>
#include <linux/err.h>
-#include <linux/rbtree_latch.h>
+#include <linux/kallsyms.h>
#include <linux/numa.h>
#include <linux/wait.h>
@@ -307,8 +307,9 @@ struct bpf_prog_aux {
bool offload_requested;
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
- struct latch_tree_node ksym_tnode;
- struct list_head ksym_lnode;
+
+ struct kallsym_node ktn;
+
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
struct bpf_prog *prog;
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -932,23 +932,6 @@ static inline bool bpf_jit_kallsyms_enab
return false;
}
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym);
-bool is_bpf_text_address(unsigned long addr);
-int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
- char *sym);
-
-static inline const char *
-bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
-{
- const char *ret = __bpf_address_lookup(addr, size, off, sym);
-
- if (ret && modname)
- *modname = NULL;
- return ret;
-}
-
void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);
@@ -974,31 +957,6 @@ static inline bool bpf_jit_kallsyms_enab
return false;
}
-static inline const char *
-__bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym)
-{
- return NULL;
-}
-
-static inline bool is_bpf_text_address(unsigned long addr)
-{
- return false;
-}
-
-static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
- char *type, char *sym)
-{
- return -ERANGE;
-}
-
-static inline const char *
-bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
-{
- return NULL;
-}
-
static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
}
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -30,7 +30,6 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/frame.h>
-#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
@@ -100,8 +99,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
- INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
-
return fp;
}
EXPORT_SYMBOL_GPL(bpf_prog_alloc);
@@ -530,86 +527,35 @@ static void bpf_get_prog_name(const stru
*sym = 0;
}
-static __always_inline unsigned long
-bpf_get_prog_addr_start(struct latch_tree_node *n)
-{
- unsigned long symbol_start, symbol_end;
- const struct bpf_prog_aux *aux;
-
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
-
- return symbol_start;
-}
-
-static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
- struct latch_tree_node *b)
-{
- return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
-}
-
-static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
-{
- unsigned long val = (unsigned long)key;
- unsigned long symbol_start, symbol_end;
- const struct bpf_prog_aux *aux;
-
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
-
- if (val < symbol_start)
- return -1;
- if (val >= symbol_end)
- return 1;
-
- return 0;
-}
-
-static const struct latch_tree_ops bpf_tree_ops = {
- .less = bpf_tree_less,
- .comp = bpf_tree_comp,
-};
-
-static DEFINE_SPINLOCK(bpf_lock);
-static LIST_HEAD(bpf_kallsyms);
-static struct latch_tree_root bpf_tree __cacheline_aligned;
-
-static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
-{
- WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
- list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
- latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
-}
-
-static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
-{
- if (list_empty(&aux->ksym_lnode))
- return;
-
- latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
- list_del_rcu(&aux->ksym_lnode);
-}
static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
{
return fp->jited && !bpf_prog_was_classic(fp);
}
-static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
+static void bpf_kn_names(struct kallsym_node *kn, char *sym, char **modname)
{
- return list_empty(&fp->aux->ksym_lnode) ||
- fp->aux->ksym_lnode.prev == LIST_POISON2;
+ struct bpf_prog_aux *aux = container_of(kn, struct bpf_prog_aux, ktn);
+
+ *modname = "eBPF-jit";
+ bpf_get_prog_name(aux->prog, sym);
}
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
+ unsigned long sym_start, sym_end;
+
if (!bpf_prog_kallsyms_candidate(fp) ||
!capable(CAP_SYS_ADMIN))
return;
- spin_lock_bh(&bpf_lock);
- bpf_prog_ksym_node_add(fp->aux);
- spin_unlock_bh(&bpf_lock);
+ bpf_get_prog_addr_region(fp, &sym_start, &sym_end);
+
+ fp->aux->ktn.kn_addr = sym_start;
+ fp->aux->ktn.kn_len = sym_end - sym_start;
+ fp->aux->ktn.kn_names = bpf_kn_names;
+
+ kallsym_tree_add(&fp->aux->ktn);
}
void bpf_prog_kallsyms_del(struct bpf_prog *fp)
@@ -617,85 +563,7 @@ void bpf_prog_kallsyms_del(struct bpf_pr
if (!bpf_prog_kallsyms_candidate(fp))
return;
- spin_lock_bh(&bpf_lock);
- bpf_prog_ksym_node_del(fp->aux);
- spin_unlock_bh(&bpf_lock);
-}
-
-static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
-{
- struct latch_tree_node *n;
-
- if (!bpf_jit_kallsyms_enabled())
- return NULL;
-
- n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
- return n ?
- container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
- NULL;
-}
-
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym)
-{
- unsigned long symbol_start, symbol_end;
- struct bpf_prog *prog;
- char *ret = NULL;
-
- rcu_read_lock();
- prog = bpf_prog_kallsyms_find(addr);
- if (prog) {
- bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
- bpf_get_prog_name(prog, sym);
-
- ret = sym;
- if (size)
- *size = symbol_end - symbol_start;
- if (off)
- *off = addr - symbol_start;
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-bool is_bpf_text_address(unsigned long addr)
-{
- bool ret;
-
- rcu_read_lock();
- ret = bpf_prog_kallsyms_find(addr) != NULL;
- rcu_read_unlock();
-
- return ret;
-}
-
-int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
- char *sym)
-{
- struct bpf_prog_aux *aux;
- unsigned int it = 0;
- int ret = -ERANGE;
-
- if (!bpf_jit_kallsyms_enabled())
- return ret;
-
- rcu_read_lock();
- list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
- if (it++ != symnum)
- continue;
-
- bpf_get_prog_name(aux->prog, sym);
-
- *value = (unsigned long)aux->prog->bpf_func;
- *type = BPF_SYM_ELF_TYPE;
-
- ret = 0;
- break;
- }
- rcu_read_unlock();
-
- return ret;
+ kallsym_tree_del(&fp->aux->ktn);
}
static atomic_long_t bpf_jit_current;
@@ -806,8 +674,6 @@ void __weak bpf_jit_free(struct bpf_prog
bpf_jit_binary_unlock_ro(hdr);
bpf_jit_binary_free(hdr);
-
- WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
bpf_prog_unlock_free(fp);
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -135,7 +135,7 @@ int kernel_text_address(unsigned long ad
* coming back from idle, or cpu on or offlining.
*
* is_module_text_address() as well as the kprobe slots
- * and is_bpf_text_address() require RCU to be watching.
+ * and is_kallsym_tree_text_address() require RCU to be watching.
*/
no_rcu = !rcu_is_watching();
@@ -151,8 +151,6 @@ int kernel_text_address(unsigned long ad
goto out;
if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
goto out;
- if (is_bpf_text_address(addr))
- goto out;
ret = 0;
out:
if (no_rcu)
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -425,8 +425,7 @@ int kallsyms_lookup_size_offset(unsigned
if (is_ksym_addr(addr))
return !!get_symbol_pos(addr, symbolsize, offset);
return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) ||
- !!kallsym_tree_address_lookup(addr, symbolsize, offset, NULL, namebuf) ||
- !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
+ !!kallsym_tree_address_lookup(addr, symbolsize, offset, NULL, namebuf);
}
/*
@@ -464,11 +463,6 @@ const char *kallsyms_lookup(unsigned lon
if (!ret)
ret = kallsym_tree_address_lookup(addr, symbolsize,
offset, modname, namebuf);
-
- if (!ret)
- ret = bpf_address_lookup(addr, symbolsize,
- offset, modname, namebuf);
-
if (!ret)
ret = ftrace_mod_address_lookup(addr, symbolsize,
offset, modname, namebuf);
@@ -673,15 +667,6 @@ static int get_ksymbol_ftrace_mod(struct
return 1;
}
-static int get_ksymbol_bpf(struct kallsym_iter *iter)
-{
- iter->module_name[0] = '\0';
- iter->exported = 0;
- return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
- &iter->value, &iter->type,
- iter->name) < 0 ? 0 : 1;
-}
-
/* Returns space to next name. */
static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
{
@@ -734,7 +719,7 @@ static int update_iter_mod(struct kallsy
get_ksymbol_ftrace_mod(iter))
return 1;
- return get_ksymbol_bpf(iter);
+ return 0;
}
/* Returns false if pos at or past end of file. */
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-17 12:56 ` Peter Zijlstra
@ 2019-01-17 14:49 ` Song Liu
2019-01-17 14:58 ` Arnaldo Carvalho de Melo
2019-01-18 8:41 ` Peter Zijlstra
1 sibling, 1 reply; 20+ messages in thread
From: Song Liu @ 2019-01-17 14:49 UTC (permalink / raw)
To: Peter Zijlstra
Cc: lkml, Netdev, acme, ast, daniel, Kernel Team, dsahern, Steven Rostedt
> On Jan 17, 2019, at 4:56 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Jan 16, 2019 at 08:29:23AM -0800, Song Liu wrote:
>> For better performance analysis of dynamically JITed and loaded kernel
>> functions, such as BPF programs, this patch introduces
>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
>> register/unregister information to user space.
>>
>> The following data structure is used for PERF_RECORD_KSYMBOL.
>>
>> /*
>> * struct {
>> * struct perf_event_header header;
>> * u64 addr;
>> * u32 len;
>> * u16 ksym_type;
>> * u16 flags;
>> * char name[];
>> * struct sample_id sample_id;
>> * };
>> */
>
> So I've cobbled together the attached patches to see how it would work
> out..
>
> I didn't convert ftrace trampolines; because ftrace code has this
> uncanny ability to make my head hurt. But I don't think it should be
> hard, once one figures out the right structure to stick that
> kallsym_node thing in (ftrace_ops ?!).
>
> It is compiled only, so no testing what so ever (also, no changelogs).
>
> I didn't wire up the KSYM_TYPE thing; I'm wondering if we really need
> that, OTOH it really doesn't hurt having it either.
>
> One weird thing I noticed, wth does bpf_prog_kallsyms_add() check
> CAP_ADMIN ?! Surely even a non-priv JIT'ed program generates symbols,
> why hide those?
>
> Anyway; with the one nit about the get_names() thing sorted:
>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>
> (thanks for sticking with this)
> <peterz-latch-next.patch><peterz-kallsym.patch><peterz-kallsym-bpf.patch>
Aha, now I get the point on perf_event_ksymbol(). Yeah this approach is
definitely better.
While I run more tests with these patches, could we get current in
perf/core? This will enable the development of user space tools like
bcc.
Also, I current base this set on bpf-next tree, as tip/perf/core is
4 week old. Shall I rebase the set on Linus' tree? Or shall I wait for
tip/perf/core?
Thanks again!
Song
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-17 14:49 ` Song Liu
@ 2019-01-17 14:58 ` Arnaldo Carvalho de Melo
2019-01-17 15:02 ` Song Liu
2019-01-18 8:38 ` Peter Zijlstra
0 siblings, 2 replies; 20+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-01-17 14:58 UTC (permalink / raw)
To: Song Liu
Cc: Peter Zijlstra, linux-kernel, Netdev, Alexei Starovoitov,
Daniel Borkmann, Kernel Team, David Ahern, Steven Rostedt
Em Thu, Jan 17, 2019 at 02:49:10PM +0000, Song Liu escreveu:
>
>
> > On Jan 17, 2019, at 4:56 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Wed, Jan 16, 2019 at 08:29:23AM -0800, Song Liu wrote:
> >> For better performance analysis of dynamically JITed and loaded kernel
> >> functions, such as BPF programs, this patch introduces
> >> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
> >> register/unregister information to user space.
> >>
> >> The following data structure is used for PERF_RECORD_KSYMBOL.
> >>
> >> /*
> >> * struct {
> >> * struct perf_event_header header;
> >> * u64 addr;
> >> * u32 len;
> >> * u16 ksym_type;
> >> * u16 flags;
> >> * char name[];
> >> * struct sample_id sample_id;
> >> * };
> >> */
> >
> > So I've cobbled together the attached patches to see how it would work
> > out..
> >
> > I didn't convert ftrace trampolines; because ftrace code has this
> > uncanny ability to make my head hurt. But I don't think it should be
> > hard, once one figures out the right structure to stick that
> > kallsym_node thing in (ftrace_ops ?!).
> >
> > It is compiled only, so no testing what so ever (also, no changelogs).
> >
> > I didn't wire up the KSYM_TYPE thing; I'm wondering if we really need
> > that, OTOH it really doesn't hurt having it either.
> >
> > One weird thing I noticed, wth does bpf_prog_kallsyms_add() check
> > CAP_ADMIN ?! Surely even a non-priv JIT'ed program generates symbols,
> > why hide those?
> >
> > Anyway; with the one nit about the get_names() thing sorted:
> >
> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> >
> > (thanks for sticking with this)
> > <peterz-latch-next.patch><peterz-kallsym.patch><peterz-kallsym-bpf.patch>
>
> Aha, now I get the point on perf_event_ksymbol(). Yeah this approach is
> definitely better.
>
> While I run more tests with these patches, could we get current in
> perf/core? This will enable the development of user space tools like
> bcc.
>
> Also, I current base this set on bpf-next tree, as tip/perf/core is
> 4 week old. Shall I rebase the set on Linus' tree? Or shall I wait for
> tip/perf/core?
So, can you post one last set, this time with PeterZ's Acked-by,
assuming you're sorting out the get_names() thing, and I can try merging
this into my perf/core branch, then pushing it out to Ingo, my perf/core
starts from tip/perf/urgent, so should be new enough.
I'd then right after testing it send a pull request to Ingo, synching
everything.
- Arnaldo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-17 14:58 ` Arnaldo Carvalho de Melo
@ 2019-01-17 15:02 ` Song Liu
2019-01-18 8:38 ` Peter Zijlstra
1 sibling, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-17 15:02 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Peter Zijlstra, linux-kernel, Netdev, Alexei Starovoitov,
Daniel Borkmann, Kernel Team, David Ahern, Steven Rostedt
> On Jan 17, 2019, at 6:58 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>
> Em Thu, Jan 17, 2019 at 02:49:10PM +0000, Song Liu escreveu:
>>
>>
>>> On Jan 17, 2019, at 4:56 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>>>
>>> On Wed, Jan 16, 2019 at 08:29:23AM -0800, Song Liu wrote:
>>>> For better performance analysis of dynamically JITed and loaded kernel
>>>> functions, such as BPF programs, this patch introduces
>>>> PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
>>>> register/unregister information to user space.
>>>>
>>>> The following data structure is used for PERF_RECORD_KSYMBOL.
>>>>
>>>> /*
>>>> * struct {
>>>> * struct perf_event_header header;
>>>> * u64 addr;
>>>> * u32 len;
>>>> * u16 ksym_type;
>>>> * u16 flags;
>>>> * char name[];
>>>> * struct sample_id sample_id;
>>>> * };
>>>> */
>>>
>>> So I've cobbled together the attached patches to see how it would work
>>> out..
>>>
>>> I didn't convert ftrace trampolines; because ftrace code has this
>>> uncanny ability to make my head hurt. But I don't think it should be
>>> hard, once one figures out the right structure to stick that
>>> kallsym_node thing in (ftrace_ops ?!).
>>>
>>> It is compiled only, so no testing what so ever (also, no changelogs).
>>>
>>> I didn't wire up the KSYM_TYPE thing; I'm wondering if we really need
>>> that, OTOH it really doesn't hurt having it either.
>>>
>>> One weird thing I noticed, wth does bpf_prog_kallsyms_add() check
>>> CAP_ADMIN ?! Surely even a non-priv JIT'ed program generates symbols,
>>> why hide those?
>>>
>>> Anyway; with the one nit about the get_names() thing sorted:
>>>
>>> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>>
>>> (thanks for sticking with this)
>>> <peterz-latch-next.patch><peterz-kallsym.patch><peterz-kallsym-bpf.patch>
>>
>> Aha, now I get the point on perf_event_ksymbol(). Yeah this approach is
>> definitely better.
>>
>> While I run more tests with these patches, could we get current in
>> perf/core? This will enable the development of user space tools like
>> bcc.
>>
>> Also, I current base this set on bpf-next tree, as tip/perf/core is
>> 4 week old. Shall I rebase the set on Linus' tree? Or shall I wait for
>> tip/perf/core?
>
> So, can you post one last set, this time with PeterZ's Acked-by,
> assuming you're sorting out the get_names() thing, and I can try merging
> this into my perf/core branch, then pushing it out to Ingo, my perf/core
> starts from tip/perf/urgent, so should be new enough.
>
> I'd then right after testing it send a pull request to Ingo, synching
> everything.
>
> - Arnaldo
Thanks Arnaldo! I will send it soon.
Song
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-17 14:58 ` Arnaldo Carvalho de Melo
2019-01-17 15:02 ` Song Liu
@ 2019-01-18 8:38 ` Peter Zijlstra
1 sibling, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2019-01-18 8:38 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Song Liu, linux-kernel, Netdev, Alexei Starovoitov,
Daniel Borkmann, Kernel Team, David Ahern, Steven Rostedt
On Thu, Jan 17, 2019 at 11:58:54AM -0300, Arnaldo Carvalho de Melo wrote:
> So, can you post one last set, this time with PeterZ's Acked-by,
> assuming you're sorting out the get_names() thing, and I can try merging
> this into my perf/core branch, then pushing it out to Ingo, my perf/core
> starts from tip/perf/urgent, so should be new enough.
Works for me; thanks Arnaldo!
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL
2019-01-17 12:56 ` Peter Zijlstra
2019-01-17 14:49 ` Song Liu
@ 2019-01-18 8:41 ` Peter Zijlstra
1 sibling, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2019-01-18 8:41 UTC (permalink / raw)
To: Song Liu
Cc: linux-kernel, netdev, acme, ast, daniel, kernel-team, dsahern,
Steven Rostedt
On Thu, Jan 17, 2019 at 01:56:53PM +0100, Peter Zijlstra wrote:
> +static __always_inline struct latch_tree_node *
> +latch_tree_first(struct latch_tree_root *root)
> +{
> + struct latch_tree_node *ltn = NULL;
> + struct rb_node *node;
> + unsigned int seq;
> +
> + do {
> + struct rb_root *rbr;
> +
> + seq = raw_read_seqcount_latch(&root->seq);
> + rbr = &root->tree[seq & 1];
> + node = rb_first(rbr);
> + } while (read_seqcount_retry(&root->seq, seq));
> +
> + if (node)
> + ltn = __lt_from_rb(node, seq & 1);
> +
> + return ltn;
> +}
> +
> +/**
> + * latch_tree_next() - find the next @ltn in @root per sort order
> + * @root: trees to which @ltn belongs
> + * @ltn: nodes to start from
> + *
> + * Does a lockless lookup in the trees @root for the next node starting at
> + * @ltn.
> + *
> + * Using this function outside of the write side lock is rather dodgy but given
> + * latch_tree_erase() doesn't re-init the nodes and the whole iteration is done
> + * under a single RCU critical section, it should be non-fatal and generate some
> + * semblance of order - albeit possibly missing chunks of the tree.
> + */
> +static __always_inline struct latch_tree_node *
> +latch_tree_next(struct latch_tree_root *root, struct latch_tree_node *ltn)
> +{
> + struct rb_node *node;
> + unsigned int seq;
> +
> + do {
> + seq = raw_read_seqcount_latch(&root->seq);
> + node = rb_next(<n->node[seq & 1]);
> + } while (read_seqcount_retry(&root->seq, seq));
> +
> + return __lt_from_rb(node, seq & 1);
> +}
> +static int kallsym_tree_kallsym(unsigned int symnum, unsigned long *value, char *type,
> + char *sym, char *modname, int *exported)
> +{
> + struct latch_tree_node *ltn;
> + int i, ret = -ERANGE;
> +
> + rcu_read_lock();
> + for (i = 0, ltn = latch_tree_first(&kallsym_tree); i < symnum && ltn;
> + i++, ltn = latch_tree_next(&kallsym_tree, ltn))
> + ;
On second thought; I don't think this will be good enough after all.
Missing entire subtrees is too much.
The rcu-list iteration will only miss newly added symbols, and for those
we'll get the events, combined we'll still have a complete picture. Not
so when a whole subtree goes missing.
I thought I could avoid the list this way, but alas, not so.
> +
> + if (ltn) {
> + struct kallsym_node *kn;
> + char *mod;
> +
> + kn = container_of(ltn, struct kallsym_node, kn_node);
> +
> + kn->kn_names(kn, sym, &mod);
> + if (mod)
> + strlcpy(modname, mod, MODULE_NAME_LEN);
> + else
> + modname[0] = '\0';
> +
> + *type = 't';
> + *exported = 0;
> + ret = 0;
> + }
> + rcu_read_unlock();
> +
> + return ret;
> +}
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 2/9] sync tools/include/uapi/linux/perf_event.h
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT Song Liu
` (6 subsequent siblings)
8 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
sync changes for PERF_RECORD_KSYMBOL
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/include/uapi/linux/perf_event.h | 26 +++++++++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 9de8780ac8d9..68c4da0227c5 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -372,7 +372,8 @@ struct perf_event_attr {
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
- __reserved_1 : 35;
+ ksymbol : 1, /* include ksymbol events */
+ __reserved_1 : 34;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -965,9 +966,32 @@ enum perf_event_type {
*/
PERF_RECORD_NAMESPACES = 16,
+ /*
+ * Record ksymbol register/unregister events:
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 addr;
+ * u32 len;
+ * u16 ksym_type;
+ * u16 flags;
+ * char name[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_KSYMBOL = 17,
+
PERF_RECORD_MAX, /* non-ABI */
};
+enum perf_record_ksymbol_type {
+ PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
+ PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
+ PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
+};
+
+#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
+
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 1/9] perf, bpf: Introduce PERF_RECORD_KSYMBOL Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 2/9] sync tools/include/uapi/linux/perf_event.h Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-17 13:09 ` Peter Zijlstra
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 4/9] sync tools/include/uapi/linux/perf_event.h Song Liu
` (5 subsequent siblings)
8 siblings, 1 reply; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
For better performance analysis of BPF programs, this patch introduces
PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program
load/unload information to user space.
Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs.
The following example shows kernel symbols for a BPF program with 7
sub programs:
ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F
ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F
ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F
ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F
ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F
ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F
ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F
ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi
When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for
each of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not
needed for simple profiling.
For annotation, user space need to listen to PERF_RECORD_BPF_EVENT
and gather more information about these (sub) programs via sys_bpf.
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
include/linux/filter.h | 7 ++
include/linux/perf_event.h | 6 ++
include/uapi/linux/perf_event.h | 29 +++++++-
kernel/bpf/core.c | 2 +-
kernel/bpf/syscall.c | 2 +
kernel/events/core.c | 120 ++++++++++++++++++++++++++++++++
6 files changed, 164 insertions(+), 2 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ad106d845b22..d531d4250bff 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -951,6 +951,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size,
void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
#else /* CONFIG_BPF_JIT */
@@ -1006,6 +1007,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
{
}
+
+static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+{
+ sym[0] = '\0';
+}
+
#endif /* CONFIG_BPF_JIT */
void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 77b2560f2dc7..0b539a2e21af 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1119,6 +1119,9 @@ typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
bool unregister,
perf_ksymbol_get_name_f get_name, void *data);
+extern void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1346,6 +1349,9 @@ static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
bool unregister,
perf_ksymbol_get_name_f get_name,
void *data) { }
+static inline void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 68c4da0227c5..8bd78a34e396 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -373,7 +373,8 @@ struct perf_event_attr {
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- __reserved_1 : 34;
+ bpf_event : 1, /* include bpf events */
+ __reserved_1 : 33;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -981,6 +982,25 @@ enum perf_event_type {
*/
PERF_RECORD_KSYMBOL = 17,
+ /*
+ * Record bpf events:
+ * enum perf_bpf_event_type {
+ * PERF_BPF_EVENT_UNKNOWN = 0,
+ * PERF_BPF_EVENT_PROG_LOAD = 1,
+ * PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ * };
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u16 type;
+ * u16 flags;
+ * u32 id;
+ * u8 tag[BPF_TAG_SIZE];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_BPF_EVENT = 18,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -992,6 +1012,13 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
+enum perf_bpf_event_type {
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
+};
+
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f908b9356025..19c49313c709 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -495,7 +495,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
*symbol_end = addr + hdr->pages * PAGE_SIZE;
}
-static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
const char *end = sym + KSYM_NAME_LEN;
const struct btf_type *type;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..30ebd085790b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1211,6 +1211,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
bpf_prog_kallsyms_del_all(prog);
@@ -1554,6 +1555,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
}
bpf_prog_kallsyms_add(prog);
+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
return err;
free_used_maps:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ef27f2776999..2f238a8ddaab 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
+static atomic_t nr_bpf_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -4308,6 +4309,8 @@ static void unaccount_event(struct perf_event *event)
dec = true;
if (event->attr.ksymbol)
atomic_dec(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_dec(&nr_bpf_events);
if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7744,6 +7747,121 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
}
+/*
+ * bpf program load/unload tracking
+ */
+
+struct perf_bpf_event {
+ struct bpf_prog *prog;
+ struct {
+ struct perf_event_header header;
+ u16 type;
+ u16 flags;
+ u32 id;
+ u8 tag[BPF_TAG_SIZE];
+ } event_id;
+};
+
+static int perf_event_bpf_match(struct perf_event *event)
+{
+ return event->attr.bpf_event;
+}
+
+static void perf_event_bpf_output(struct perf_event *event, void *data)
+{
+ struct perf_bpf_event *bpf_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_bpf_match(event))
+ return;
+
+ perf_event_header__init_id(&bpf_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ bpf_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, bpf_event->event_id);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+static int perf_event_bpf_get_name(char *name, int len, void *data)
+{
+ struct bpf_prog *prog = data;
+
+ bpf_get_prog_name(prog, name);
+ return 0;
+}
+
+static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
+ enum perf_bpf_event_type type)
+{
+ bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
+ int i;
+
+ if (prog->aux->func_cnt == 0) {
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)prog->bpf_func,
+ prog->jited_len, unregister,
+ perf_event_bpf_get_name, prog);
+ } else {
+ for (i = 0; i < prog->aux->func_cnt; i++) {
+ struct bpf_prog *subprog = prog->aux->func[i];
+
+ perf_event_ksymbol(
+ PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)subprog->bpf_func,
+ subprog->jited_len, unregister,
+ perf_event_bpf_get_name, subprog);
+ }
+ }
+}
+
+void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags)
+{
+ struct perf_bpf_event bpf_event;
+
+ if (type <= PERF_BPF_EVENT_UNKNOWN ||
+ type >= PERF_BPF_EVENT_MAX)
+ return;
+
+ switch (type) {
+ case PERF_BPF_EVENT_PROG_LOAD:
+ case PERF_BPF_EVENT_PROG_UNLOAD:
+ if (atomic_read(&nr_ksymbol_events))
+ perf_event_bpf_emit_ksymbols(prog, type);
+ break;
+ default:
+ break;
+ }
+
+ if (!atomic_read(&nr_bpf_events))
+ return;
+
+ bpf_event = (struct perf_bpf_event){
+ .prog = prog,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(bpf_event.event_id),
+ },
+ .type = type,
+ .flags = flags,
+ .id = prog->aux->id,
+ },
+ };
+
+ memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
+ perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+}
+
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
@@ -9996,6 +10114,8 @@ static void account_event(struct perf_event *event)
inc = true;
if (event->attr.ksymbol)
atomic_inc(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_inc(&nr_bpf_events);
if (inc) {
/*
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT Song Liu
@ 2019-01-17 13:09 ` Peter Zijlstra
2019-01-17 13:49 ` Song Liu
0 siblings, 1 reply; 20+ messages in thread
From: Peter Zijlstra @ 2019-01-17 13:09 UTC (permalink / raw)
To: Song Liu; +Cc: linux-kernel, netdev, acme, ast, daniel, kernel-team, dsahern
On Wed, Jan 16, 2019 at 08:29:25AM -0800, Song Liu wrote:
> + /*
> + * Record bpf events:
> + * enum perf_bpf_event_type {
> + * PERF_BPF_EVENT_UNKNOWN = 0,
> + * PERF_BPF_EVENT_PROG_LOAD = 1,
> + * PERF_BPF_EVENT_PROG_UNLOAD = 2,
> + * };
> + *
> + * struct {
> + * struct perf_event_header header;
> + * u16 type;
> + * u16 flags;
> + * u32 id;
> + * u8 tag[BPF_TAG_SIZE];
This does forever fix BPF_TAG_SIZE; is that intentional? We could easily
make that a variable length field like with the other event. Or is that
value already part of the eBPF ABI?
> + * struct sample_id sample_id;
> + * };
> + */
> + PERF_RECORD_BPF_EVENT = 18,
> @@ -7744,6 +7747,121 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
> WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
> }
>
> +struct perf_bpf_event {
> + struct bpf_prog *prog;
> + struct {
> + struct perf_event_header header;
> + u16 type;
> + u16 flags;
> + u32 id;
> + u8 tag[BPF_TAG_SIZE];
> + } event_id;
> +};
> +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
> + enum perf_bpf_event_type type)
> +{
> + bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
> + int i;
> +
> + if (prog->aux->func_cnt == 0) {
> + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
> + (u64)(unsigned long)prog->bpf_func,
> + prog->jited_len, unregister,
> + perf_event_bpf_get_name, prog);
> + } else {
> + for (i = 0; i < prog->aux->func_cnt; i++) {
> + struct bpf_prog *subprog = prog->aux->func[i];
> +
> + perf_event_ksymbol(
> + PERF_RECORD_KSYMBOL_TYPE_BPF,
> + (u64)(unsigned long)subprog->bpf_func,
> + subprog->jited_len, unregister,
> + perf_event_bpf_get_name, subprog);
> + }
> + }
> +}
I still think this is a weird place to do this.. :-) See them patches I
just send.
> +void perf_event_bpf_event(struct bpf_prog *prog,
> + enum perf_bpf_event_type type,
> + u16 flags)
> +{
> + struct perf_bpf_event bpf_event;
> +
> + if (type <= PERF_BPF_EVENT_UNKNOWN ||
> + type >= PERF_BPF_EVENT_MAX)
> + return;
> +
> + switch (type) {
> + case PERF_BPF_EVENT_PROG_LOAD:
> + case PERF_BPF_EVENT_PROG_UNLOAD:
> + if (atomic_read(&nr_ksymbol_events))
> + perf_event_bpf_emit_ksymbols(prog, type);
> + break;
> + default:
> + break;
> + }
> +
> + if (!atomic_read(&nr_bpf_events))
> + return;
> +
> + bpf_event = (struct perf_bpf_event){
> + .prog = prog,
> + .event_id = {
> + .header = {
> + .type = PERF_RECORD_BPF_EVENT,
> + .size = sizeof(bpf_event.event_id),
> + },
> + .type = type,
> + .flags = flags,
> + .id = prog->aux->id,
> + },
> + };
BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
> + memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
> + perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
> +}
Anyway, small nits only:
Acked-by: Peter Zijlstra (Intel) <peterz@infradeaed.org>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT
2019-01-17 13:09 ` Peter Zijlstra
@ 2019-01-17 13:49 ` Song Liu
0 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-17 13:49 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: lkml, netdev, acme, ast, daniel, Kernel Team, dsahern
Thanks Peter!
> On Jan 17, 2019, at 5:09 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Jan 16, 2019 at 08:29:25AM -0800, Song Liu wrote:
>> + /*
>> + * Record bpf events:
>> + * enum perf_bpf_event_type {
>> + * PERF_BPF_EVENT_UNKNOWN = 0,
>> + * PERF_BPF_EVENT_PROG_LOAD = 1,
>> + * PERF_BPF_EVENT_PROG_UNLOAD = 2,
>> + * };
>> + *
>> + * struct {
>> + * struct perf_event_header header;
>> + * u16 type;
>> + * u16 flags;
>> + * u32 id;
>> + * u8 tag[BPF_TAG_SIZE];
>
> This does forever fix BPF_TAG_SIZE; is that intentional? We could easily
> make that a variable length field like with the other event. Or is that
> value already part of the eBPF ABI?
Yes, BPF_TAG_SIZE is already part of eBPF ABI.
Song
>
>> + * struct sample_id sample_id;
>> + * };
>> + */
>> + PERF_RECORD_BPF_EVENT = 18,
>> @@ -7744,6 +7747,121 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
>> WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
>> }
>>
>> +struct perf_bpf_event {
>> + struct bpf_prog *prog;
>> + struct {
>> + struct perf_event_header header;
>> + u16 type;
>> + u16 flags;
>> + u32 id;
>> + u8 tag[BPF_TAG_SIZE];
>> + } event_id;
>> +};
>
>> +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
>> + enum perf_bpf_event_type type)
>> +{
>> + bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
>> + int i;
>> +
>> + if (prog->aux->func_cnt == 0) {
>> + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
>> + (u64)(unsigned long)prog->bpf_func,
>> + prog->jited_len, unregister,
>> + perf_event_bpf_get_name, prog);
>> + } else {
>> + for (i = 0; i < prog->aux->func_cnt; i++) {
>> + struct bpf_prog *subprog = prog->aux->func[i];
>> +
>> + perf_event_ksymbol(
>> + PERF_RECORD_KSYMBOL_TYPE_BPF,
>> + (u64)(unsigned long)subprog->bpf_func,
>> + subprog->jited_len, unregister,
>> + perf_event_bpf_get_name, subprog);
>> + }
>> + }
>> +}
>
> I still think this is a weird place to do this.. :-) See them patches I
> just send.
>
>> +void perf_event_bpf_event(struct bpf_prog *prog,
>> + enum perf_bpf_event_type type,
>> + u16 flags)
>> +{
>> + struct perf_bpf_event bpf_event;
>> +
>> + if (type <= PERF_BPF_EVENT_UNKNOWN ||
>> + type >= PERF_BPF_EVENT_MAX)
>> + return;
>> +
>> + switch (type) {
>> + case PERF_BPF_EVENT_PROG_LOAD:
>> + case PERF_BPF_EVENT_PROG_UNLOAD:
>> + if (atomic_read(&nr_ksymbol_events))
>> + perf_event_bpf_emit_ksymbols(prog, type);
>> + break;
>> + default:
>> + break;
>> + }
>> +
>> + if (!atomic_read(&nr_bpf_events))
>> + return;
>> +
>> + bpf_event = (struct perf_bpf_event){
>> + .prog = prog,
>> + .event_id = {
>> + .header = {
>> + .type = PERF_RECORD_BPF_EVENT,
>> + .size = sizeof(bpf_event.event_id),
>> + },
>> + .type = type,
>> + .flags = flags,
>> + .id = prog->aux->id,
>> + },
>> + };
>
> BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
>
>> + memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
>> + perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
>> +}
>
> Anyway, small nits only:
>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradeaed.org>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 4/9] sync tools/include/uapi/linux/perf_event.h
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
` (2 preceding siblings ...)
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 3/9] perf, bpf: introduce PERF_RECORD_BPF_EVENT Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 5/9] perf util: handle PERF_RECORD_KSYMBOL Song Liu
` (4 subsequent siblings)
8 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
sync for PERF_RECORD_BPF_EVENT
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/include/uapi/linux/perf_event.h | 29 ++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 68c4da0227c5..8bd78a34e396 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -373,7 +373,8 @@ struct perf_event_attr {
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- __reserved_1 : 34;
+ bpf_event : 1, /* include bpf events */
+ __reserved_1 : 33;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -981,6 +982,25 @@ enum perf_event_type {
*/
PERF_RECORD_KSYMBOL = 17,
+ /*
+ * Record bpf events:
+ * enum perf_bpf_event_type {
+ * PERF_BPF_EVENT_UNKNOWN = 0,
+ * PERF_BPF_EVENT_PROG_LOAD = 1,
+ * PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ * };
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u16 type;
+ * u16 flags;
+ * u32 id;
+ * u8 tag[BPF_TAG_SIZE];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_BPF_EVENT = 18,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -992,6 +1012,13 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
+enum perf_bpf_event_type {
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
+};
+
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 5/9] perf util: handle PERF_RECORD_KSYMBOL
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
` (3 preceding siblings ...)
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 4/9] sync tools/include/uapi/linux/perf_event.h Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 6/9] perf util: handle PERF_RECORD_BPF_EVENT Song Liu
` (3 subsequent siblings)
8 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
This patch handles PERF_RECORD_KSYMBOL in perf record/report.
Specifically, map and symbol are created for ksymbol register, and
removed for ksymbol unregister.
This patch also set perf_event_attr.ksymbol properly. The flag is
ON by default.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/util/event.c | 21 +++++++++++++++
tools/perf/util/event.h | 20 ++++++++++++++
tools/perf/util/evsel.c | 10 ++++++-
tools/perf/util/evsel.h | 1 +
tools/perf/util/machine.c | 55 +++++++++++++++++++++++++++++++++++++++
tools/perf/util/machine.h | 3 +++
tools/perf/util/session.c | 4 +++
tools/perf/util/tool.h | 4 ++-
8 files changed, 116 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 937a5a4f71cc..3c8a6a8dd260 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -24,6 +24,7 @@
#include "symbol/kallsyms.h"
#include "asm/bug.h"
#include "stat.h"
+#include "session.h"
#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
@@ -45,6 +46,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
+ [PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -1329,6 +1331,14 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
return machine__process_switch_event(machine, event);
}
+int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_ksymbol(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -1461,6 +1471,14 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
}
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " ksymbol event with addr %lx len %u type %u flags 0x%x name %s\n",
+ event->ksymbol_event.addr, event->ksymbol_event.len,
+ event->ksymbol_event.ksym_type,
+ event->ksymbol_event.flags, event->ksymbol_event.name);
+}
+
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -1496,6 +1514,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_LOST:
ret += perf_event__fprintf_lost(event, fp);
break;
+ case PERF_RECORD_KSYMBOL:
+ ret += perf_event__fprintf_ksymbol(event, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index eb95f3384958..018322f2a13e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -5,6 +5,7 @@
#include <limits.h>
#include <stdio.h>
#include <linux/kernel.h>
+#include <linux/bpf.h>
#include "../perf.h"
#include "build-id.h"
@@ -84,6 +85,19 @@ struct throttle_event {
u64 stream_id;
};
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 256
+#endif
+
+struct ksymbol_event {
+ struct perf_event_header header;
+ u64 addr;
+ u32 len;
+ u16 ksym_type;
+ u16 flags;
+ char name[KSYM_NAME_LEN];
+};
+
#define PERF_SAMPLE_MASK \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
@@ -651,6 +665,7 @@ union perf_event {
struct stat_round_event stat_round;
struct time_conv_event time_conv;
struct feature_event feat;
+ struct ksymbol_event ksymbol_event;
};
void perf_event__print_totals(void);
@@ -748,6 +763,10 @@ int perf_event__process_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_ksymbol(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
@@ -811,6 +830,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbc0466db368..9c8dc6d1aa7f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1035,6 +1035,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->mmap = track;
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
+ attr->ksymbol = track && !perf_missing_features.ksymbol;
if (opts->record_namespaces)
attr->namespaces = track;
@@ -1652,6 +1653,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(context_switch, p_unsigned);
PRINT_ATTRf(write_backward, p_unsigned);
PRINT_ATTRf(namespaces, p_unsigned);
+ PRINT_ATTRf(ksymbol, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
@@ -1811,6 +1813,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
PERF_SAMPLE_BRANCH_NO_CYCLES);
if (perf_missing_features.group_read && evsel->attr.inherit)
evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
+ if (perf_missing_features.ksymbol)
+ evsel->attr.ksymbol = 0;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1930,7 +1934,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+ if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) {
+ perf_missing_features.ksymbol = true;
+ pr_debug2("switching off ksymbol\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
perf_missing_features.write_backward = true;
pr_debug2("switching off write_backward\n");
goto out_close;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 82a289ce8b0c..4a8c3e7f4808 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -168,6 +168,7 @@ struct perf_missing_features {
bool lbr_flags;
bool write_backward;
bool group_read;
+ bool ksymbol;
};
extern struct perf_missing_features perf_missing_features;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 143f7057d581..9bca61c7d5bf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -681,6 +681,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static int machine__process_ksymbol_register(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct symbol *sym;
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
+ if (!map) {
+ map = dso__new_map(event->ksymbol_event.name);
+ if (!map)
+ return -ENOMEM;
+
+ map->start = event->ksymbol_event.addr;
+ map->pgoff = map->start;
+ map->end = map->start + event->ksymbol_event.len;
+ map_groups__insert(&machine->kmaps, map);
+ }
+
+ sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
+ 0, 0, event->ksymbol_event.name);
+ if (!sym)
+ return -ENOMEM;
+ dso__insert_symbol(map->dso, sym);
+ return 0;
+}
+
+static int machine__process_ksymbol_unregister(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
+ if (map)
+ map_groups__remove(&machine->kmaps, map);
+
+ return 0;
+}
+
+int machine__process_ksymbol(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (dump_trace)
+ perf_event__fprintf_ksymbol(event, stdout);
+
+ if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
+ return machine__process_ksymbol_unregister(machine, event,
+ sample);
+ return machine__process_ksymbol_register(machine, event, sample);
+}
+
static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
{
const char *dup_filename;
@@ -1812,6 +1865,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
ret = machine__process_switch_event(machine, event); break;
+ case PERF_RECORD_KSYMBOL:
+ ret = machine__process_ksymbol(machine, event, sample); break;
default:
ret = -1;
break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a5d1da60f751..4ecd380ce1b4 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -130,6 +130,9 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
+int machine__process_ksymbol(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5456c84c7dd1..2efa75bb0c0a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -376,6 +376,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->itrace_start = perf_event__process_itrace_start;
if (tool->context_switch == NULL)
tool->context_switch = perf_event__process_switch;
+ if (tool->ksymbol == NULL)
+ tool->ksymbol = perf_event__process_ksymbol;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -1305,6 +1307,8 @@ static int machines__deliver_event(struct machines *machines,
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
return tool->context_switch(tool, event, sample, machine);
+ case PERF_RECORD_KSYMBOL:
+ return tool->ksymbol(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 56e4ca54020a..9c81ca2f3cf7 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -53,7 +53,9 @@ struct perf_tool {
itrace_start,
context_switch,
throttle,
- unthrottle;
+ unthrottle,
+ ksymbol;
+
event_attr_op attr;
event_attr_op event_update;
event_op2 tracing_data;
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 6/9] perf util: handle PERF_RECORD_BPF_EVENT
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
` (4 preceding siblings ...)
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 5/9] perf util: handle PERF_RECORD_KSYMBOL Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 7/9] perf tools: synthesize PERF_RECORD_* for loaded BPF programs Song Liu
` (2 subsequent siblings)
8 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
This patch adds basic handling of PERF_RECORD_BPF_EVENT.
Tracking of PERF_RECORD_BPF_EVENT is OFF by default. Option --bpf-event
is added to turn it on.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/builtin-record.c | 1 +
tools/perf/perf.h | 1 +
tools/perf/util/Build | 2 ++
tools/perf/util/bpf-event.c | 15 +++++++++++++++
tools/perf/util/bpf-event.h | 11 +++++++++++
tools/perf/util/event.c | 20 ++++++++++++++++++++
tools/perf/util/event.h | 16 ++++++++++++++++
tools/perf/util/evsel.c | 11 ++++++++++-
tools/perf/util/evsel.h | 1 +
tools/perf/util/machine.c | 3 +++
tools/perf/util/session.c | 4 ++++
tools/perf/util/tool.h | 3 ++-
12 files changed, 86 insertions(+), 2 deletions(-)
create mode 100644 tools/perf/util/bpf-event.c
create mode 100644 tools/perf/util/bpf-event.h
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 882285fb9f64..deaf9b902094 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1839,6 +1839,7 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
"synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
+ OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"),
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
"Fail if the specified frequency can't be used"),
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 388c6dd128b8..5941fb6eccfc 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -66,6 +66,7 @@ struct record_opts {
bool ignore_missing_thread;
bool strict_freq;
bool sample_id;
+ bool bpf_event;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index af72be7f5b3b..fa8305390315 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -152,6 +152,8 @@ endif
libperf-y += perf-hooks.o
+libperf-$(CONFIG_LIBBPF) += bpf-event.o
+
libperf-$(CONFIG_CXX) += c++/
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
new file mode 100644
index 000000000000..87004706874f
--- /dev/null
+++ b/tools/perf/util/bpf-event.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <bpf/bpf.h>
+#include "bpf-event.h"
+#include "debug.h"
+#include "symbol.h"
+
+int machine__process_bpf_event(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_bpf_event(event, stdout);
+ return 0;
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
new file mode 100644
index 000000000000..d5ca355dd298
--- /dev/null
+++ b/tools/perf/util/bpf-event.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_EVENT_H
+#define __PERF_BPF_EVENT_H
+
+#include "machine.h"
+
+int machine__process_bpf_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
+
+#endif
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 3c8a6a8dd260..3b646d27374e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -25,6 +25,7 @@
#include "asm/bug.h"
#include "stat.h"
#include "session.h"
+#include "bpf-event.h"
#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
@@ -47,6 +48,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
+ [PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -1339,6 +1341,14 @@ int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
return machine__process_ksymbol(machine, event, sample);
}
+int perf_event__process_bpf_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_bpf_event(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -1479,6 +1489,13 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
event->ksymbol_event.flags, event->ksymbol_event.name);
}
+size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " bpf event with type %u, flags %u, id %u\n",
+ event->bpf_event.type, event->bpf_event.flags,
+ event->bpf_event.id);
+}
+
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -1517,6 +1534,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_KSYMBOL:
ret += perf_event__fprintf_ksymbol(event, fp);
break;
+ case PERF_RECORD_BPF_EVENT:
+ ret += perf_event__fprintf_bpf_event(event, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 018322f2a13e..dad32b81fe71 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -98,6 +98,16 @@ struct ksymbol_event {
char name[KSYM_NAME_LEN];
};
+struct bpf_event {
+ struct perf_event_header header;
+ u16 type;
+ u16 flags;
+ u32 id;
+
+ /* for bpf_prog types */
+ u8 tag[BPF_TAG_SIZE]; // prog tag
+};
+
#define PERF_SAMPLE_MASK \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
@@ -666,6 +676,7 @@ union perf_event {
struct time_conv_event time_conv;
struct feature_event feat;
struct ksymbol_event ksymbol_event;
+ struct bpf_event bpf_event;
};
void perf_event__print_totals(void);
@@ -767,6 +778,10 @@ int perf_event__process_ksymbol(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_bpf_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
@@ -831,6 +846,7 @@ size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9c8dc6d1aa7f..684c893ca6bc 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1036,6 +1036,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
attr->ksymbol = track && !perf_missing_features.ksymbol;
+ attr->bpf_event = track && opts->bpf_event &&
+ !perf_missing_features.bpf_event;
if (opts->record_namespaces)
attr->namespaces = track;
@@ -1654,6 +1656,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(write_backward, p_unsigned);
PRINT_ATTRf(namespaces, p_unsigned);
PRINT_ATTRf(ksymbol, p_unsigned);
+ PRINT_ATTRf(bpf_event, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
@@ -1815,6 +1818,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
if (perf_missing_features.ksymbol)
evsel->attr.ksymbol = 0;
+ if (perf_missing_features.bpf_event)
+ evsel->attr.bpf_event = 0;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1934,7 +1939,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) {
+ if (!perf_missing_features.bpf_event && evsel->attr.bpf_event) {
+ perf_missing_features.bpf_event = true;
+ pr_debug2("switching off bpf_event\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) {
perf_missing_features.ksymbol = true;
pr_debug2("switching off ksymbol\n");
goto fallback_missing_features;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4a8c3e7f4808..29c5eb68c44b 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -169,6 +169,7 @@ struct perf_missing_features {
bool write_backward;
bool group_read;
bool ksymbol;
+ bool bpf_event;
};
extern struct perf_missing_features perf_missing_features;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9bca61c7d5bf..ae85106bb5bf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -21,6 +21,7 @@
#include "unwind.h"
#include "linux/hash.h"
#include "asm/bug.h"
+#include "bpf-event.h"
#include "sane_ctype.h"
#include <symbol/kallsyms.h>
@@ -1867,6 +1868,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_switch_event(machine, event); break;
case PERF_RECORD_KSYMBOL:
ret = machine__process_ksymbol(machine, event, sample); break;
+ case PERF_RECORD_BPF_EVENT:
+ ret = machine__process_bpf_event(machine, event, sample); break;
default:
ret = -1;
break;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 2efa75bb0c0a..026bf04bba74 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -378,6 +378,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->context_switch = perf_event__process_switch;
if (tool->ksymbol == NULL)
tool->ksymbol = perf_event__process_ksymbol;
+ if (tool->bpf_event == NULL)
+ tool->bpf_event = perf_event__process_bpf_event;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -1309,6 +1311,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->context_switch(tool, event, sample, machine);
case PERF_RECORD_KSYMBOL:
return tool->ksymbol(tool, event, sample, machine);
+ case PERF_RECORD_BPF_EVENT:
+ return tool->bpf_event(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 9c81ca2f3cf7..250391672f9f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -54,7 +54,8 @@ struct perf_tool {
context_switch,
throttle,
unthrottle,
- ksymbol;
+ ksymbol,
+ bpf_event;
event_attr_op attr;
event_attr_op event_update;
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 7/9] perf tools: synthesize PERF_RECORD_* for loaded BPF programs
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
` (5 preceding siblings ...)
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 6/9] perf util: handle PERF_RECORD_BPF_EVENT Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 8/9] perf top: Synthesize BPF events for pre-existing " Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 9/9] bpf: add module name [bpf] to ksymbols for bpf programs Song Liu
8 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
This patch synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for
BPF programs loaded before perf-record. This is achieved by gathering
information about all BPF programs via sys_bpf.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/builtin-record.c | 6 +
tools/perf/util/bpf-event.c | 241 ++++++++++++++++++++++++++++++++++++
tools/perf/util/bpf-event.h | 5 +
3 files changed, 252 insertions(+)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index deaf9b902094..88ea11d57c6f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -41,6 +41,7 @@
#include "util/perf-hooks.h"
#include "util/time-utils.h"
#include "util/units.h"
+#include "util/bpf-event.h"
#include "asm/bug.h"
#include <errno.h>
@@ -1082,6 +1083,11 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
+ err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
+ machine, opts);
+ if (err < 0)
+ pr_warning("Couldn't synthesize bpf events.\n");
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address,
1);
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 87004706874f..126f9728a756 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -1,10 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <linux/btf.h>
#include "bpf-event.h"
#include "debug.h"
#include "symbol.h"
+#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+
+static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
+{
+ int ret = 0;
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ ret += snprintf(buf + ret, size - ret, "%02x", data[i]);
+ return ret;
+}
+
int machine__process_bpf_event(struct machine *machine __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
@@ -13,3 +27,230 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
perf_event__fprintf_bpf_event(event, stdout);
return 0;
}
+
+/*
+ * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
+ * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
+ * one PERF_RECORD_KSYMBOL is generated for each sub program.
+ *
+ * Returns:
+ * 0 for success;
+ * -1 for failures;
+ * -2 for lack of kernel support.
+ */
+static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ int fd,
+ union perf_event *event,
+ struct record_opts *opts)
+{
+ struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
+ struct bpf_event *bpf_event = &event->bpf_event;
+ u32 sub_prog_cnt, i, func_info_rec_size;
+ u8 (*prog_tags)[BPF_TAG_SIZE] = NULL;
+ struct bpf_prog_info info = {};
+ u32 info_len = sizeof(info);
+ void *func_infos = NULL;
+ u64 *prog_addrs = NULL;
+ struct btf *btf = NULL;
+ u32 *prog_lens = NULL;
+ bool has_btf = false;
+ char errbuf[512];
+ int err = 0;
+
+ /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+
+ if (err) {
+ pr_debug("%s: failed to get BPF program info: %s, aborting\n",
+ __func__, str_error_r(errno, errbuf, sizeof(errbuf)));
+ return -1;
+ }
+ if (info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+ pr_debug("%s: the kernel is too old, aborting\n", __func__);
+ return -2;
+ }
+
+ /* number of ksyms, func_lengths, and tags should match */
+ sub_prog_cnt = info.nr_jited_ksyms;
+ if (sub_prog_cnt != info.nr_prog_tags ||
+ sub_prog_cnt != info.nr_jited_func_lens)
+ return -1;
+
+ /* check BTF func info support */
+ if (info.btf_id && info.nr_func_info && info.func_info_rec_size) {
+ /* btf func info number should be same as sub_prog_cnt */
+ if (sub_prog_cnt != info.nr_func_info) {
+ pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
+ return -1;
+ }
+ if (btf__get_from_id(info.btf_id, &btf)) {
+ pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id);
+ return -1;
+ }
+ func_info_rec_size = info.func_info_rec_size;
+ func_infos = calloc(sub_prog_cnt, func_info_rec_size);
+ if (!func_infos) {
+ pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__);
+ return -1;
+ }
+ has_btf = true;
+ }
+
+ /*
+ * We need address, length, and tag for each sub program.
+ * Allocate memory and call bpf_obj_get_info_by_fd() again
+ */
+ prog_addrs = calloc(sub_prog_cnt, sizeof(u64));
+ if (!prog_addrs) {
+ pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__);
+ goto out;
+ }
+ prog_lens = calloc(sub_prog_cnt, sizeof(u32));
+ if (!prog_lens) {
+ pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__);
+ goto out;
+ }
+ prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE);
+ if (!prog_tags) {
+ pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__);
+ goto out;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.nr_jited_ksyms = sub_prog_cnt;
+ info.nr_jited_func_lens = sub_prog_cnt;
+ info.nr_prog_tags = sub_prog_cnt;
+ info.jited_ksyms = ptr_to_u64(prog_addrs);
+ info.jited_func_lens = ptr_to_u64(prog_lens);
+ info.prog_tags = ptr_to_u64(prog_tags);
+ info_len = sizeof(info);
+ if (has_btf) {
+ info.nr_func_info = sub_prog_cnt;
+ info.func_info_rec_size = func_info_rec_size;
+ info.func_info = ptr_to_u64(func_infos);
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (err) {
+ pr_debug("%s: failed to get BPF program info, aborting\n", __func__);
+ goto out;
+ }
+
+ /* Synthesize PERF_RECORD_KSYMBOL */
+ for (i = 0; i < sub_prog_cnt; i++) {
+ const struct bpf_func_info *finfo;
+ const char *short_name = NULL;
+ const struct btf_type *t;
+ int name_len;
+
+ *ksymbol_event = (struct ksymbol_event){
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = sizeof(struct ksymbol_event),
+ },
+ .addr = prog_addrs[i],
+ .len = prog_lens[i],
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+ name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN,
+ "bpf_prog_");
+ name_len += snprintf_hex(ksymbol_event->name + name_len,
+ KSYM_NAME_LEN - name_len,
+ prog_tags[i], BPF_TAG_SIZE);
+ if (has_btf) {
+ finfo = func_infos + i * info.func_info_rec_size;
+ t = btf__type_by_id(btf, finfo->type_id);
+ short_name = btf__name_by_offset(btf, t->name_off);
+ } else if (i == 0 && sub_prog_cnt == 1) {
+ /* no subprog */
+ if (info.name[0])
+ short_name = info.name;
+ } else
+ short_name = "F";
+ if (short_name)
+ name_len += snprintf(ksymbol_event->name + name_len,
+ KSYM_NAME_LEN - name_len,
+ "_%s", short_name);
+
+ ksymbol_event->header.size += PERF_ALIGN(name_len + 1,
+ sizeof(u64));
+ err = perf_tool__process_synth_event(tool, event,
+ machine, process);
+ }
+
+ /* Synthesize PERF_RECORD_BPF_EVENT */
+ if (opts->bpf_event) {
+ *bpf_event = (struct bpf_event){
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(struct bpf_event),
+ },
+ .type = PERF_BPF_EVENT_PROG_LOAD,
+ .flags = 0,
+ .id = info.id,
+ };
+ memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE);
+ err = perf_tool__process_synth_event(tool, event,
+ machine, process);
+ }
+
+out:
+ free(prog_tags);
+ free(prog_lens);
+ free(prog_addrs);
+ free(func_infos);
+ free(btf);
+ return err ? -1 : 0;
+}
+
+int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct record_opts *opts)
+{
+ union perf_event *event;
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN);
+ if (!event)
+ return -1;
+ while (true) {
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ pr_debug("%s: can't get next program: %s%s",
+ __func__, strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ /* don't report error on old kernel */
+ err = (errno == EINVAL) ? 0 : -1;
+ break;
+ }
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ pr_debug("%s: failed to get fd for prog_id %u\n",
+ __func__, id);
+ continue;
+ }
+
+ err = perf_event__synthesize_one_bpf_prog(tool, process,
+ machine, fd,
+ event, opts);
+ close(fd);
+ if (err) {
+ /* do not return error for old kernel */
+ if (err == -2)
+ err = 0;
+ break;
+ }
+ }
+ free(event);
+ return err;
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index d5ca355dd298..38aee4040f12 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -8,4 +8,9 @@ int machine__process_bpf_event(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
+int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct record_opts *opts);
+
#endif
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 8/9] perf top: Synthesize BPF events for pre-existing loaded BPF programs
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
` (6 preceding siblings ...)
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 7/9] perf tools: synthesize PERF_RECORD_* for loaded BPF programs Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 9/9] bpf: add module name [bpf] to ksymbols for bpf programs Song Liu
8 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Arnaldo Carvalho de Melo, peterz, acme, ast, daniel, kernel-team,
dsahern, Song Liu
From: Arnaldo Carvalho de Melo <acme@redhat.com>
So that we can resolve symbols and map names.
Cc: Song Liu <songliubraving@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@fb.com
Cc: netdev@vger.kernel.org
Link: https://lkml.kernel.org/n/tip-h3loibff6htedj43q7uinon0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/builtin-top.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f64e312db787..5a486d4de56e 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,6 +22,7 @@
#include "perf.h"
#include "util/annotate.h"
+#include "util/bpf-event.h"
#include "util/config.h"
#include "util/color.h"
#include "util/drv_configs.h"
@@ -1215,6 +1216,12 @@ static int __cmd_top(struct perf_top *top)
init_process_thread(top);
+ ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process,
+ &top->session->machines.host,
+ &top->record_opts);
+ if (ret < 0)
+ pr_warning("Couldn't synthesize bpf events.\n");
+
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false,
top->nr_threads_synthesize);
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v10 perf, bpf-next 9/9] bpf: add module name [bpf] to ksymbols for bpf programs
2019-01-16 16:29 [PATCH v10 perf, bpf-next 0/9] reveal invisible bpf programs Song Liu
` (7 preceding siblings ...)
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 8/9] perf top: Synthesize BPF events for pre-existing " Song Liu
@ 2019-01-16 16:29 ` Song Liu
2019-01-17 13:10 ` Peter Zijlstra
8 siblings, 1 reply; 20+ messages in thread
From: Song Liu @ 2019-01-16 16:29 UTC (permalink / raw)
To: linux-kernel, netdev
Cc: Song Liu, peterz, acme, ast, daniel, kernel-team, dsahern
With this patch, /proc/kallsyms will show BPF programs as
<addr> t bpf_prog_<tag>_<name> [bpf]
Signed-off-by: Song Liu <songliubraving@fb.com>
---
kernel/kallsyms.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index f3a04994e063..14934afa9e68 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -494,7 +494,7 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
static int get_ksymbol_bpf(struct kallsym_iter *iter)
{
- iter->module_name[0] = '\0';
+ strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN);
iter->exported = 0;
return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
&iter->value, &iter->type,
--
2.17.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v10 perf, bpf-next 9/9] bpf: add module name [bpf] to ksymbols for bpf programs
2019-01-16 16:29 ` [PATCH v10 perf, bpf-next 9/9] bpf: add module name [bpf] to ksymbols for bpf programs Song Liu
@ 2019-01-17 13:10 ` Peter Zijlstra
0 siblings, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2019-01-17 13:10 UTC (permalink / raw)
To: Song Liu; +Cc: linux-kernel, netdev, acme, ast, daniel, kernel-team, dsahern
On Wed, Jan 16, 2019 at 08:29:31AM -0800, Song Liu wrote:
> With this patch, /proc/kallsyms will show BPF programs as
>
> <addr> t bpf_prog_<tag>_<name> [bpf]
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> kernel/kallsyms.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index f3a04994e063..14934afa9e68 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -494,7 +494,7 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
>
> static int get_ksymbol_bpf(struct kallsym_iter *iter)
> {
> - iter->module_name[0] = '\0';
> + strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN);
> iter->exported = 0;
> return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
> &iter->value, &iter->type,
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 20+ messages in thread