* [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() @ 2019-01-11 15:55 Arnaldo Carvalho de Melo 2019-01-12 13:59 ` Jamal Hadi Salim 2019-01-18 14:52 ` Peter Zijlstra 0 siblings, 2 replies; 6+ messages in thread From: Arnaldo Carvalho de Melo @ 2019-01-11 15:55 UTC (permalink / raw) To: Peter Zijlstra Cc: Ingo Molnar, Alexei Starovoitov, Daniel Borkmann, Jamal Hadi Salim, Linux Kernel Mailing List, Linux Networking Development Mailing List Hi Peter, bpf_perf_event_open() already returns a value, but if perf_event_output's output_begin (mostly perf_output_begin) fails, the only way to know about that is looking before/after the rb->lost, right? For ring buffer users that is ok, we'll get a PERF_RECORD_LOST, etc, but for BPF programs it would be convenient to get that -ENOSPC and do some fallback, whatever makes sense, like in my augmented_syscalls stuff for 'perf trace', i.e. don't augment it (i.e. push stuff at the end of the normal payload), just don't filter the raw_syscalls:sys_enter, 'perf trace' will get the enter syscall enter event without the pointer dereference at the end, etc, warn the user but don't lose a syscall in the strace-like output. What do you think? Am I missing something? Probably ;-) Ah, its just test built. - Arnaldo diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..9ed2af2abd6d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event, extern void perf_event_output_backward(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); -extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); +extern int perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..dcbb2b508034 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static __always_inline void +static __always_inline int __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, @@ -6499,13 +6499,15 @@ __perf_event_output(struct perf_event *event, { struct perf_output_handle handle; struct perf_event_header header; + int err; /* protect the callchain buffers */ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (output_begin(&handle, event, header.size)) + err = output_begin(&handle, event, header.size); + if (err) goto exit; perf_output_sample(&handle, &header, data, event); @@ -6514,6 +6516,7 @@ __perf_event_output(struct perf_event *event, exit: rcu_read_unlock(); + return err; } void @@ -6532,12 +6535,12 @@ perf_event_output_backward(struct perf_event *event, __perf_event_output(event, data, regs, perf_output_begin_backward); } -void +int perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - __perf_event_output(event, data, regs, perf_output_begin); + return __perf_event_output(event, data, regs, perf_output_begin); } /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..088c2032ceaf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_event_output(event, sd, regs); - return 0; + return perf_event_output(event, sd, regs); } BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..9e9d4c66e53c 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args) len = sizeof(augmented_args.args); } - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); - return 0; + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..b7dba114e36c 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ len &= sizeof(augmented_args.filename.value) - 1; \ } \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ @@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ /* addrlen = augmented_args.args.addrlen; */ \ /* */ \ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..550e69c2e8d1 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ args->filename_ptr); \ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ return 0; \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ } struct syscall_enter_openat_args { ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() 2019-01-11 15:55 [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() Arnaldo Carvalho de Melo @ 2019-01-12 13:59 ` Jamal Hadi Salim 2019-01-18 14:52 ` Peter Zijlstra 1 sibling, 0 replies; 6+ messages in thread From: Jamal Hadi Salim @ 2019-01-12 13:59 UTC (permalink / raw) To: Arnaldo Carvalho de Melo, Peter Zijlstra Cc: Ingo Molnar, Alexei Starovoitov, Daniel Borkmann, Linux Kernel Mailing List, Linux Networking Development Mailing List On 2019-01-11 10:55 a.m., Arnaldo Carvalho de Melo wrote: > Hi Peter, > > bpf_perf_event_open() already returns a value, but if > perf_event_output's output_begin (mostly perf_output_begin) fails, > the only way to know about that is looking before/after the rb->lost, > right? > > For ring buffer users that is ok, we'll get a PERF_RECORD_LOST, > etc, but for BPF programs it would be convenient to get that -ENOSPC and > do some fallback, whatever makes sense, like in my augmented_syscalls > stuff for 'perf trace', i.e. don't augment it (i.e. push stuff at the > end of the normal payload), just don't filter the > raw_syscalls:sys_enter, 'perf trace' will get the enter syscall enter > event without the pointer dereference at the end, etc, warn the user but > don't lose a syscall in the strace-like output. > > What do you think? Am I missing something? Probably ;-) > > Ah, its just test built. Works as advertised ;-> Tested-by: Jamal Hadi Salim <jhs@mojatatu.com> cheers, jamal ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() 2019-01-11 15:55 [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() Arnaldo Carvalho de Melo 2019-01-12 13:59 ` Jamal Hadi Salim @ 2019-01-18 14:52 ` Peter Zijlstra 2019-01-18 15:09 ` Arnaldo Carvalho de Melo 1 sibling, 1 reply; 6+ messages in thread From: Peter Zijlstra @ 2019-01-18 14:52 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Ingo Molnar, Alexei Starovoitov, Daniel Borkmann, Jamal Hadi Salim, Linux Kernel Mailing List, Linux Networking Development Mailing List On Fri, Jan 11, 2019 at 12:55:38PM -0300, Arnaldo Carvalho de Melo wrote: > Hi Peter, > > bpf_perf_event_open() already returns a value, but if > perf_event_output's output_begin (mostly perf_output_begin) fails, > the only way to know about that is looking before/after the rb->lost, > right? > > For ring buffer users that is ok, we'll get a PERF_RECORD_LOST, > etc, but for BPF programs it would be convenient to get that -ENOSPC and > do some fallback, whatever makes sense, like in my augmented_syscalls > stuff for 'perf trace', i.e. don't augment it (i.e. push stuff at the > end of the normal payload), just don't filter the > raw_syscalls:sys_enter, 'perf trace' will get the enter syscall enter > event without the pointer dereference at the end, etc, warn the user but > don't lose a syscall in the strace-like output. > > What do you think? Am I missing something? Probably ;-) Seems ok to do this. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() 2019-01-18 14:52 ` Peter Zijlstra @ 2019-01-18 15:09 ` Arnaldo Carvalho de Melo 2019-01-18 16:19 ` Peter Zijlstra 2019-01-22 10:17 ` [tip:perf/core] perf: Make perf_event_output() propagate the output() return tip-bot for Arnaldo Carvalho de Melo 0 siblings, 2 replies; 6+ messages in thread From: Arnaldo Carvalho de Melo @ 2019-01-18 15:09 UTC (permalink / raw) To: Peter Zijlstra Cc: Ingo Molnar, Alexei Starovoitov, Daniel Borkmann, Jamal Hadi Salim, Clark Williams, Linux Kernel Mailing List, Linux Networking Development Mailing List Em Fri, Jan 18, 2019 at 03:52:37PM +0100, Peter Zijlstra escreveu: > On Fri, Jan 11, 2019 at 12:55:38PM -0300, Arnaldo Carvalho de Melo wrote: > > Hi Peter, > > > > bpf_perf_event_open() already returns a value, but if > > perf_event_output's output_begin (mostly perf_output_begin) fails, > > the only way to know about that is looking before/after the rb->lost, > > right? > > > > For ring buffer users that is ok, we'll get a PERF_RECORD_LOST, > > etc, but for BPF programs it would be convenient to get that -ENOSPC and > > do some fallback, whatever makes sense, like in my augmented_syscalls > > stuff for 'perf trace', i.e. don't augment it (i.e. push stuff at the > > end of the normal payload), just don't filter the > > raw_syscalls:sys_enter, 'perf trace' will get the enter syscall enter > > event without the pointer dereference at the end, etc, warn the user but > > don't lose a syscall in the strace-like output. > > > > What do you think? Am I missing something? Probably ;-) > > Seems ok to do this. Can I take that as an Acked-by? This is the patch with a cset comment, if you're ok, I'll put it together with Song's PERF_RECORD_{KSYMBOL,BPF_EVENT} into my perf/core branch and send to Ingo, please advise, - Arnaldo commit 1b3b3dee572d0b77a316ab6715091201be6832de Author: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Fri Jan 11 13:20:20 2019 -0300 perf: Make perf_event_output() propagate the output() return For the original mode of operation it isn't needed, since we report back errors via PERF_RECORD_LOST records in the ring buffer, but for use in bpf_perf_event_output() it is convenient to return the errors, basically -ENOSPC. Currently bpf_perf_event_output() returns an error indication, the last thing it does, which is to push it to the ring buffer is that can fail and if so, this failure won't be reported back to its users, fix it. Reported-by: Jamal Hadi Salim <jhs@mojatatu.com> Tested-by: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lkml.kernel.org/r/20190111155538.GX22483@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..9ed2af2abd6d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event, extern void perf_event_output_backward(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); -extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); +extern int perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..dcbb2b508034 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static __always_inline void +static __always_inline int __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, @@ -6499,13 +6499,15 @@ __perf_event_output(struct perf_event *event, { struct perf_output_handle handle; struct perf_event_header header; + int err; /* protect the callchain buffers */ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (output_begin(&handle, event, header.size)) + err = output_begin(&handle, event, header.size); + if (err) goto exit; perf_output_sample(&handle, &header, data, event); @@ -6514,6 +6516,7 @@ __perf_event_output(struct perf_event *event, exit: rcu_read_unlock(); + return err; } void @@ -6532,12 +6535,12 @@ perf_event_output_backward(struct perf_event *event, __perf_event_output(event, data, regs, perf_output_begin_backward); } -void +int perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - __perf_event_output(event, data, regs, perf_output_begin); + return __perf_event_output(event, data, regs, perf_output_begin); } /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..088c2032ceaf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_event_output(event, sd, regs); - return 0; + return perf_event_output(event, sd, regs); } BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..9e9d4c66e53c 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args) len = sizeof(augmented_args.args); } - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); - return 0; + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..b7dba114e36c 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ len &= sizeof(augmented_args.filename.value) - 1; \ } \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ @@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ /* addrlen = augmented_args.args.addrlen; */ \ /* */ \ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..550e69c2e8d1 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ args->filename_ptr); \ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ return 0; \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ } struct syscall_enter_openat_args { ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() 2019-01-18 15:09 ` Arnaldo Carvalho de Melo @ 2019-01-18 16:19 ` Peter Zijlstra 2019-01-22 10:17 ` [tip:perf/core] perf: Make perf_event_output() propagate the output() return tip-bot for Arnaldo Carvalho de Melo 1 sibling, 0 replies; 6+ messages in thread From: Peter Zijlstra @ 2019-01-18 16:19 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Ingo Molnar, Alexei Starovoitov, Daniel Borkmann, Jamal Hadi Salim, Clark Williams, Linux Kernel Mailing List, Linux Networking Development Mailing List On Fri, Jan 18, 2019 at 12:09:38PM -0300, Arnaldo Carvalho de Melo wrote: > commit 1b3b3dee572d0b77a316ab6715091201be6832de > Author: Arnaldo Carvalho de Melo <acme@redhat.com> > Date: Fri Jan 11 13:20:20 2019 -0300 > > perf: Make perf_event_output() propagate the output() return > > For the original mode of operation it isn't needed, since we report back > errors via PERF_RECORD_LOST records in the ring buffer, but for use in > bpf_perf_event_output() it is convenient to return the errors, basically > -ENOSPC. > > Currently bpf_perf_event_output() returns an error indication, the last > thing it does, which is to push it to the ring buffer is that can fail > and if so, this failure won't be reported back to its users, fix it. > > Reported-by: Jamal Hadi Salim <jhs@mojatatu.com> > Tested-by: Jamal Hadi Salim <jhs@mojatatu.com> > Cc: Adrian Hunter <adrian.hunter@intel.com> > Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com> > Cc: Daniel Borkmann <daniel@iogearbox.net> > Cc: Jiri Olsa <jolsa@kernel.org> > Cc: Namhyung Kim <namhyung@kernel.org> > Cc: Peter Zijlstra <peterz@infradead.org> > Link: https://lkml.kernel.org/r/20190111155538.GX22483@kernel.org > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> /me hates on git-format :-) Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> ^ permalink raw reply [flat|nested] 6+ messages in thread
* [tip:perf/core] perf: Make perf_event_output() propagate the output() return 2019-01-18 15:09 ` Arnaldo Carvalho de Melo 2019-01-18 16:19 ` Peter Zijlstra @ 2019-01-22 10:17 ` tip-bot for Arnaldo Carvalho de Melo 1 sibling, 0 replies; 6+ messages in thread From: tip-bot for Arnaldo Carvalho de Melo @ 2019-01-22 10:17 UTC (permalink / raw) To: linux-tip-commits Cc: linux-kernel, jolsa, peterz, tglx, alexei.starovoitov, daniel, acme, mingo, jhs, adrian.hunter, hpa, namhyung Commit-ID: 5620196951192f7cd2da0a04e7c0113f40bfc14e Gitweb: https://git.kernel.org/tip/5620196951192f7cd2da0a04e7c0113f40bfc14e Author: Arnaldo Carvalho de Melo <acme@redhat.com> AuthorDate: Fri, 11 Jan 2019 13:20:20 -0300 Committer: Arnaldo Carvalho de Melo <acme@redhat.com> CommitDate: Mon, 21 Jan 2019 17:00:57 -0300 perf: Make perf_event_output() propagate the output() return For the original mode of operation it isn't needed, since we report back errors via PERF_RECORD_LOST records in the ring buffer, but for use in bpf_perf_event_output() it is convenient to return the errors, basically -ENOSPC. Currently bpf_perf_event_output() returns an error indication, the last thing it does, which is to push it to the ring buffer is that can fail and if so, this failure won't be reported back to its users, fix it. Reported-by: Jamal Hadi Salim <jhs@mojatatu.com> Tested-by: Jamal Hadi Salim <jhs@mojatatu.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Link: https://lkml.kernel.org/r/20190118150938.GN5823@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- include/linux/perf_event.h | 6 +++--- kernel/events/core.c | 11 +++++++---- kernel/trace/bpf_trace.c | 3 +-- tools/perf/examples/bpf/augmented_raw_syscalls.c | 4 ++-- tools/perf/examples/bpf/augmented_syscalls.c | 14 +++++++------- tools/perf/examples/bpf/etcsnoop.c | 10 +++++----- 6 files changed, 25 insertions(+), 23 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f8ec36197718..4eb88065a9b5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event, extern void perf_event_output_backward(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); -extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); +extern int perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) diff --git a/kernel/events/core.c b/kernel/events/core.c index fbe59b793b36..bc525cd1615c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static __always_inline void +static __always_inline int __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, @@ -6499,13 +6499,15 @@ __perf_event_output(struct perf_event *event, { struct perf_output_handle handle; struct perf_event_header header; + int err; /* protect the callchain buffers */ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (output_begin(&handle, event, header.size)) + err = output_begin(&handle, event, header.size); + if (err) goto exit; perf_output_sample(&handle, &header, data, event); @@ -6514,6 +6516,7 @@ __perf_event_output(struct perf_event *event, exit: rcu_read_unlock(); + return err; } void @@ -6532,12 +6535,12 @@ perf_event_output_backward(struct perf_event *event, __perf_event_output(event, data, regs, perf_output_begin_backward); } -void +int perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - __perf_event_output(event, data, regs, perf_output_begin); + return __perf_event_output(event, data, regs, perf_output_begin); } /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..088c2032ceaf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_event_output(event, sd, regs); - return 0; + return perf_event_output(event, sd, regs); } BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..9e9d4c66e53c 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args) len = sizeof(augmented_args.args); } - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); - return 0; + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..b7dba114e36c 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ len &= sizeof(augmented_args.filename.value) - 1; \ } \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ @@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ /* addrlen = augmented_args.args.addrlen; */ \ /* */ \ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..550e69c2e8d1 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ args->filename_ptr); \ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ return 0; \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ } struct syscall_enter_openat_args { ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-01-22 10:18 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-01-11 15:55 [PATCH/RFC] Make perf_event_open() propagate errors for use in bpf_perf_event_open() Arnaldo Carvalho de Melo 2019-01-12 13:59 ` Jamal Hadi Salim 2019-01-18 14:52 ` Peter Zijlstra 2019-01-18 15:09 ` Arnaldo Carvalho de Melo 2019-01-18 16:19 ` Peter Zijlstra 2019-01-22 10:17 ` [tip:perf/core] perf: Make perf_event_output() propagate the output() return tip-bot for Arnaldo Carvalho de Melo
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).