From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752948AbbGWJnT (ORCPT ); Thu, 23 Jul 2015 05:43:19 -0400 Received: from szxga03-in.huawei.com ([119.145.14.66]:9385 "EHLO szxga03-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752339AbbGWJnD (ORCPT ); Thu, 23 Jul 2015 05:43:03 -0400 From: Kaixu Xia To: , , , , , , CC: , , , , Subject: [PATCH v3 1/3] bpf: Add new bpf map type to store the pointer to struct perf_event Date: Thu, 23 Jul 2015 09:42:40 +0000 Message-ID: <1437644562-84431-2-git-send-email-xiakaixu@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1437644562-84431-1-git-send-email-xiakaixu@huawei.com> References: <1437644562-84431-1-git-send-email-xiakaixu@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.193.250] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020206.55B0B723.01CB,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2013-05-26 15:14:31, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: e3af7e0c184850c87efb64bbba23925c Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'. This map only stores the pointer to struct perf_event. The user space event FDs from perf_event_open() syscall are converted to the pointer to struct perf_event and stored in map. Signed-off-by: Kaixu Xia --- include/linux/bpf.h | 2 + include/linux/perf_event.h | 2 + include/uapi/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 15 ++++++ kernel/events/core.c | 23 +++++++++ 6 files changed, 156 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4383476..9cf74c0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,6 +10,7 @@ #include #include #include +#include struct bpf_map; @@ -143,6 +144,7 @@ struct bpf_array { union { char value[0] __aligned(8); struct bpf_prog *prog[0] __aligned(8); + struct perf_event *events[0] __aligned(8); }; }; #define MAX_TAIL_CALL_CNT 32 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2027809..2ea4067 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -641,6 +641,7 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); +extern struct perf_event *perf_event_get(unsigned int fd); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); @@ -979,6 +980,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } +static struct perf_event *perf_event_get(unsigned int fd) { return NULL; } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 29ef6f9..69a1f6b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -114,6 +114,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; enum bpf_prog_type { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cb31229..e97efbc 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -255,3 +255,116 @@ static int __init register_prog_array_map(void) return 0; } late_initcall(register_prog_array_map); + +static struct bpf_map *perf_event_array_map_alloc(union bpf_attr *attr) +{ + /* only the pointer to struct perf_event can be stored in + * perf_event_array map + */ + if (attr->value_size != sizeof(u32)) + return ERR_PTR(-EINVAL); + + return array_map_alloc(attr); +} + +static void perf_event_array_map_free(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_event *event; + int i; + + synchronize_rcu(); + + /* release the struct perf_event in perf_event_array_map */ + for(i = 0; i < array->map.max_entries; i++) { + event = array->events[i]; + if (event) + perf_event_release_kernel(event); + } + kvfree(array); +} + +static int perf_event_array_map_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + return -EINVAL; +} + +static void *perf_event_array_map_lookup_elem(struct bpf_map *map, void *key) +{ + return NULL; +} + +static struct perf_event *convert_map_with_perf_event(void *value) +{ + struct perf_event *event; + u32 fd; + + fd = *(u32 *)value; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return NULL; + + /* limit the event type to PERF_TYPE_RAW + * and PERF_TYPE_HARDWARE. + */ + if (event->attr.type != PERF_TYPE_RAW && + event->attr.type != PERF_TYPE_HARDWARE) + return NULL; + + return event; +} + +/* only called from syscall */ +static int perf_event_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_event *event; + u32 index = *(u32 *)key; + + if (map_flags != BPF_ANY) + return -EINVAL; + + if (index >= array->map.max_entries) + return -E2BIG; + + /* check if the value is already stored */ + if (array->events[index]) + return -EINVAL; + + /* convert the fd to the pointer to struct perf_event */ + event = convert_map_with_perf_event(value); + if (!event) + return -EBADF; + + xchg(array->events + index, event); + return 0; +} + +static int perf_event_array_map_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +static const struct bpf_map_ops perf_event_array_ops = { + .map_alloc = perf_event_array_map_alloc, + .map_free = perf_event_array_map_free, + .map_get_next_key = perf_event_array_map_get_next_key, + .map_lookup_elem = perf_event_array_map_lookup_elem, + .map_update_elem = perf_event_array_map_update_elem, + .map_delete_elem = perf_event_array_map_delete_elem, +}; + +static struct bpf_map_type_list perf_event_array_type __read_mostly = { + .ops = &perf_event_array_ops, + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +}; + +static int __init register_perf_event_array_map(void) +{ + bpf_register_map_type(&perf_event_array_type); + return 0; +} +late_initcall(register_perf_event_array_map); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 039d866..c70f7e7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -924,6 +924,21 @@ static int check_call(struct verifier_env *env, int func_id) */ return -EINVAL; + if (map && map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && + func_id != BPF_FUNC_perf_event_read) + /* perf_event_array map type needs extra care: + * only allow to pass it into bpf_perf_event_read() for now. + * bpf_map_update/delete_elem() must only be done via syscall + */ + return -EINVAL; + + if (func_id == BPF_FUNC_perf_event_read && + map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + /* don't allow any other map type to be passed into + * bpf_perf_event_read() + */ + return -EINVAL; + return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index d3dae34..08cb467 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8574,6 +8574,29 @@ void perf_event_delayed_put(struct task_struct *task) WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); } +struct perf_event *perf_event_get(unsigned int fd) +{ + struct perf_event *event; + struct fd f; + + f = fdget(fd); + + if (!f.file) + return ERR_PTR(-EBADF); + + if (f.file->f_op != &perf_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + event = f.file->private_data; + + atomic_long_inc(&event->refcount); + fdput(f); + + return event; +} + /* * inherit a event from parent task to child task: */ -- 1.8.3.4