From: Kaixu Xia <xiakaixu@huawei.com>
To: <ast@plumgrid.com>, <davem@davemloft.net>, <acme@kernel.org>,
<mingo@redhat.com>, <a.p.zijlstra@chello.nl>,
<masami.hiramatsu.pt@hitachi.com>, <jolsa@kernel.org>
Cc: <xiakaixu@huawei.com>, <wangnan0@huawei.com>,
<linux-kernel@vger.kernel.org>, <pi3orama@163.com>,
<hekuang@huawei.com>
Subject: [PATCH v3 1/3] bpf: Add new bpf map type to store the pointer to struct perf_event
Date: Thu, 23 Jul 2015 09:42:40 +0000 [thread overview]
Message-ID: <1437644562-84431-2-git-send-email-xiakaixu@huawei.com> (raw)
In-Reply-To: <1437644562-84431-1-git-send-email-xiakaixu@huawei.com>
Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'.
This map only stores the pointer to struct perf_event. The
user space event FDs from perf_event_open() syscall are converted
to the pointer to struct perf_event and stored in map.
Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
---
include/linux/bpf.h | 2 +
include/linux/perf_event.h | 2 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/arraymap.c | 113 +++++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/verifier.c | 15 ++++++
kernel/events/core.c | 23 +++++++++
6 files changed, 156 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4383476..9cf74c0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -10,6 +10,7 @@
#include <uapi/linux/bpf.h>
#include <linux/workqueue.h>
#include <linux/file.h>
+#include <linux/perf_event.h>
struct bpf_map;
@@ -143,6 +144,7 @@ struct bpf_array {
union {
char value[0] __aligned(8);
struct bpf_prog *prog[0] __aligned(8);
+ struct perf_event *events[0] __aligned(8);
};
};
#define MAX_TAIL_CALL_CNT 32
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2027809..2ea4067 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -641,6 +641,7 @@ extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
+extern struct perf_event *perf_event_get(unsigned int fd);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
extern void perf_pmu_enable(struct pmu *pmu);
@@ -979,6 +980,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { }
+static struct perf_event *perf_event_get(unsigned int fd) { return NULL; }
static inline void perf_event_print_debug(void) { }
static inline int perf_event_task_disable(void) { return -EINVAL; }
static inline int perf_event_task_enable(void) { return -EINVAL; }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 29ef6f9..69a1f6b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -114,6 +114,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_HASH,
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
};
enum bpf_prog_type {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index cb31229..e97efbc 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -255,3 +255,116 @@ static int __init register_prog_array_map(void)
return 0;
}
late_initcall(register_prog_array_map);
+
+static struct bpf_map *perf_event_array_map_alloc(union bpf_attr *attr)
+{
+ /* only the pointer to struct perf_event can be stored in
+ * perf_event_array map
+ */
+ if (attr->value_size != sizeof(u32))
+ return ERR_PTR(-EINVAL);
+
+ return array_map_alloc(attr);
+}
+
+static void perf_event_array_map_free(struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct perf_event *event;
+ int i;
+
+ synchronize_rcu();
+
+ /* release the struct perf_event in perf_event_array_map */
+ for(i = 0; i < array->map.max_entries; i++) {
+ event = array->events[i];
+ if (event)
+ perf_event_release_kernel(event);
+ }
+ kvfree(array);
+}
+
+static int perf_event_array_map_get_next_key(struct bpf_map *map, void *key,
+ void *next_key)
+{
+ return -EINVAL;
+}
+
+static void *perf_event_array_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return NULL;
+}
+
+static struct perf_event *convert_map_with_perf_event(void *value)
+{
+ struct perf_event *event;
+ u32 fd;
+
+ fd = *(u32 *)value;
+
+ event = perf_event_get(fd);
+ if (IS_ERR(event))
+ return NULL;
+
+ /* limit the event type to PERF_TYPE_RAW
+ * and PERF_TYPE_HARDWARE.
+ */
+ if (event->attr.type != PERF_TYPE_RAW &&
+ event->attr.type != PERF_TYPE_HARDWARE)
+ return NULL;
+
+ return event;
+}
+
+/* only called from syscall */
+static int perf_event_array_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct perf_event *event;
+ u32 index = *(u32 *)key;
+
+ if (map_flags != BPF_ANY)
+ return -EINVAL;
+
+ if (index >= array->map.max_entries)
+ return -E2BIG;
+
+ /* check if the value is already stored */
+ if (array->events[index])
+ return -EINVAL;
+
+ /* convert the fd to the pointer to struct perf_event */
+ event = convert_map_with_perf_event(value);
+ if (!event)
+ return -EBADF;
+
+ xchg(array->events + index, event);
+ return 0;
+}
+
+static int perf_event_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return -EINVAL;
+}
+
+static const struct bpf_map_ops perf_event_array_ops = {
+ .map_alloc = perf_event_array_map_alloc,
+ .map_free = perf_event_array_map_free,
+ .map_get_next_key = perf_event_array_map_get_next_key,
+ .map_lookup_elem = perf_event_array_map_lookup_elem,
+ .map_update_elem = perf_event_array_map_update_elem,
+ .map_delete_elem = perf_event_array_map_delete_elem,
+};
+
+static struct bpf_map_type_list perf_event_array_type __read_mostly = {
+ .ops = &perf_event_array_ops,
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+};
+
+static int __init register_perf_event_array_map(void)
+{
+ bpf_register_map_type(&perf_event_array_type);
+ return 0;
+}
+late_initcall(register_perf_event_array_map);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 039d866..c70f7e7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -924,6 +924,21 @@ static int check_call(struct verifier_env *env, int func_id)
*/
return -EINVAL;
+ if (map && map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+ func_id != BPF_FUNC_perf_event_read)
+ /* perf_event_array map type needs extra care:
+ * only allow to pass it into bpf_perf_event_read() for now.
+ * bpf_map_update/delete_elem() must only be done via syscall
+ */
+ return -EINVAL;
+
+ if (func_id == BPF_FUNC_perf_event_read &&
+ map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
+ /* don't allow any other map type to be passed into
+ * bpf_perf_event_read()
+ */
+ return -EINVAL;
+
return 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3dae34..08cb467 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8574,6 +8574,29 @@ void perf_event_delayed_put(struct task_struct *task)
WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
}
+struct perf_event *perf_event_get(unsigned int fd)
+{
+ struct perf_event *event;
+ struct fd f;
+
+ f = fdget(fd);
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ if (f.file->f_op != &perf_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ event = f.file->private_data;
+
+ atomic_long_inc(&event->refcount);
+ fdput(f);
+
+ return event;
+}
+
/*
* inherit a event from parent task to child task:
*/
--
1.8.3.4
next prev parent reply other threads:[~2015-07-23 9:43 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-23 9:42 [PATCH v3 0/3] bpf: Introduce the new ability of eBPF programs to access hardware PMU counter Kaixu Xia
2015-07-23 9:42 ` Kaixu Xia [this message]
2015-07-23 22:54 ` [PATCH v3 1/3] bpf: Add new bpf map type to store the pointer to struct perf_event Alexei Starovoitov
2015-07-24 2:22 ` xiakaixu
2015-07-24 2:26 ` Alexei Starovoitov
2015-08-03 9:38 ` Peter Zijlstra
2015-07-23 9:42 ` [PATCH v3 2/3] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter Kaixu Xia
2015-07-23 22:56 ` Alexei Starovoitov
2015-07-24 1:57 ` xiakaixu
2015-08-03 9:34 ` Peter Zijlstra
2015-08-03 10:32 ` xiakaixu
2015-07-23 9:42 ` [PATCH v3 3/3] samples/bpf: example of get selected PMU counter value Kaixu Xia
2015-07-23 22:59 ` Alexei Starovoitov
2015-07-24 1:54 ` xiakaixu
2015-07-24 2:23 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1437644562-84431-2-git-send-email-xiakaixu@huawei.com \
--to=xiakaixu@huawei.com \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=ast@plumgrid.com \
--cc=davem@davemloft.net \
--cc=hekuang@huawei.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=masami.hiramatsu.pt@hitachi.com \
--cc=mingo@redhat.com \
--cc=pi3orama@163.com \
--cc=wangnan0@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).