From 70da9057d7fa7dda76e1b0861b8a0174078434ea Mon Sep 17 00:00:00 2001 From: Eugene Loh Date: Fri, 25 Jun 2021 15:18:46 -0700 Subject: [PATCH] bpf: Adjust BPF stack helper functions to accommodate skip>0 Let's say that the caller has storage for num_elem stack frames. Then, the BPF stack helper functions walk the stack for only num_elem frames. This means that if skip>0, one keeps only num_elem-skip frames. Change the computation of init_nr so that num_elem+skip stack frames are walked (and hence num_elem frames are kept). [NB: I am unsure of the bpf_get_stack*_pe() variants, which in the case of __PERF_SAMPLE_CALLCHAIN_EARLY use ctx->data->callchain, which was walked earlier. I am unclear how init_nr was chosen for it.] Change the comment on bpf_get_stack() in the header file to be more explicit what the return value means. Signed-off-by: Eugene Loh --- include/uapi/linux/bpf.h | 4 ++-- kernel/bpf/stackmap.c | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 79c893310492..7c7b93e1db90 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2183,8 +2183,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index be35bfb7fb13..e2a193581550 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -249,23 +249,30 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) #endif } +static u32 get_init_nr(u32 nelem, u64 flags) +{ + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; + + if (sysctl_perf_event_max_stack <= nelem + skip) + return 0; + else + return sysctl_perf_event_max_stack - nelem - skip; +} + static long __bpf_get_stackid(struct bpf_map *map, struct perf_callchain_entry *trace, u64 flags) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *new_bucket, *old_bucket; u32 max_depth = map->value_size / stack_map_data_size(map); - /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ - u32 init_nr = sysctl_perf_event_max_stack - max_depth; + u32 init_nr; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; u64 *ips; bool hash_matches; - /* get_perf_callchain() guarantees that trace->nr >= init_nr - * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth - */ + init_nr = get_init_nr(max_depth, flags); trace_nr = trace->nr - init_nr; if (trace_nr <= skip) @@ -331,8 +338,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u64, flags) { u32 max_depth = map->value_size / stack_map_data_size(map); - /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ - u32 init_nr = sysctl_perf_event_max_stack - max_depth; + u32 init_nr; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; @@ -341,6 +347,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; + init_nr = get_init_nr(max_depth, flags); trace = get_perf_callchain(regs, init_nr, kernel, user, sysctl_perf_event_max_stack, false, false); @@ -458,10 +465,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, goto err_fault; num_elem = size / elem_size; - if (sysctl_perf_event_max_stack < num_elem) - init_nr = 0; - else - init_nr = sysctl_perf_event_max_stack - num_elem; + init_nr = get_init_nr(num_elem, flags); if (trace_in) trace = trace_in; -- 2.27.0